From ba25d977571e1551b7032d6104e49efd6f88f8ad Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Thu, 9 Jul 2020 15:09:56 +0800 Subject: [PATCH 001/290] drm/i915/gvt: Do not destroy ppgtt_mm during vGPU D3->D0. When system enters S3 state, device enters D3 state while RAM remains powered. From vGPU/GVT perspective, ppgtt_mm is residual in guest memory during vGPU in D3 state, so that when guest state transits from S3->S0, ppgtt_mm can be re-used and no need rebuild. Previous implementation invalidate and destroy ppgtt_mm at DMLR, regardless the power state transition is S0->S3->S0 (guest suspend or resume) or OFF->S0 (normal boot/reboot), invalidate and destroy ppgtt_mm is unnecessary in the former transition case. The patch saves the vGPU D3/D0 transition state when guest writes the PCI_PM_CTRL in vGPU's configure space, then in later DMLR, GVT can decide whether or not invalidate and destroy ppgtt_mm is required. The d3_entered flags is reset after DMLR. To test this feature, make sure S3 is enabled in QEMU parameters: i440fx: PIIX4_PM.disable_s3=0 q35: ICH9-LPC.disable_s3=0 Also need enable sleep option in guest OS if it's disabled. v2: - Revise commit message to more accurate description. (Kevin) - Split patch by logic. (Zhenyu) Reviewed-by: Zhenyu Wang Signed-off-by: Hang Yuan Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200709071002.247960-2-colin.xu@intel.com --- drivers/gpu/drm/i915/gvt/cfg_space.c | 24 ++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/gtt.c | 2 +- drivers/gpu/drm/i915/gvt/gtt.h | 2 ++ drivers/gpu/drm/i915/gvt/gvt.h | 3 +++ drivers/gpu/drm/i915/gvt/vgpu.c | 17 +++++++++++++++-- 5 files changed, 45 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 072725a448db..ad86c5eb5bba 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -70,6 +70,7 @@ static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off, { u8 *cfg_base = vgpu_cfg_space(vgpu); u8 mask, new, old; + pci_power_t pwr; int i = 0; for (; i < bytes && (off + i < sizeof(pci_cfg_space_rw_bmp)); i++) { @@ -91,6 +92,15 @@ static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off, /* For other configuration space directly copy as it is. */ if (i < bytes) memcpy(cfg_base + off + i, src + i, bytes - i); + + if (off == vgpu->cfg_space.pmcsr_off && vgpu->cfg_space.pmcsr_off) { + pwr = (pci_power_t __force)(*(u16*)(&vgpu_cfg_space(vgpu)[off]) + & PCI_PM_CTRL_STATE_MASK); + if (pwr == PCI_D3hot) + vgpu->d3_entered = true; + gvt_dbg_core("vgpu-%d power status changed to %d\n", + vgpu->id, pwr); + } } /** @@ -366,6 +376,7 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, struct intel_gvt *gvt = vgpu->gvt; const struct intel_gvt_device_info *info = &gvt->device_info; u16 *gmch_ctl; + u8 next; memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space, info->cfg_space_size); @@ -401,6 +412,19 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, pci_resource_len(gvt->gt->i915->drm.pdev, 2); memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4); + + /* PM Support */ + vgpu->cfg_space.pmcsr_off = 0; + if (vgpu_cfg_space(vgpu)[PCI_STATUS] & PCI_STATUS_CAP_LIST) { + next = vgpu_cfg_space(vgpu)[PCI_CAPABILITY_LIST]; + do { + if (vgpu_cfg_space(vgpu)[next + PCI_CAP_LIST_ID] == PCI_CAP_ID_PM) { + vgpu->cfg_space.pmcsr_off = next + PCI_PM_CTRL; + break; + } + next = vgpu_cfg_space(vgpu)[next + PCI_CAP_LIST_NEXT]; + } while (next); + } } /** diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 210016192ce7..a3a4305eda01 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2501,7 +2501,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) return create_scratch_page_tree(vgpu); } -static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) +void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) { struct list_head *pos, *n; struct intel_vgpu_mm *mm; diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 320b8d6ad92f..52d0d88abd86 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -279,4 +279,6 @@ int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); +void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu); + #endif /* _GVT_GTT_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index a4a6db6b7f90..ff7f2515a6fe 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -106,6 +106,7 @@ struct intel_vgpu_pci_bar { struct intel_vgpu_cfg_space { unsigned char virtual_cfg_space[PCI_CFG_SPACE_EXP_SIZE]; struct intel_vgpu_pci_bar bar[INTEL_GVT_MAX_BAR_NUM]; + u32 pmcsr_off; }; #define vgpu_cfg_space(vgpu) ((vgpu)->cfg_space.virtual_cfg_space) @@ -198,6 +199,8 @@ struct intel_vgpu { struct intel_vgpu_submission submission; struct radix_tree_root page_track_tree; u32 hws_pga[I915_NUM_ENGINES]; + /* Set on PCI_D3, reset on DMLR, not reflecting the actual PM state */ + bool d3_entered; struct dentry *debugfs; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 7d361623ff67..fb12448fe353 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -257,6 +257,7 @@ void intel_gvt_release_vgpu(struct intel_vgpu *vgpu) intel_gvt_deactivate_vgpu(vgpu); mutex_lock(&vgpu->vgpu_lock); + vgpu->d3_entered = false; intel_vgpu_clean_workloads(vgpu, ALL_ENGINES); intel_vgpu_dmabuf_cleanup(vgpu); mutex_unlock(&vgpu->vgpu_lock); @@ -393,6 +394,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, INIT_RADIX_TREE(&vgpu->page_track_tree, GFP_KERNEL); idr_init(&vgpu->object_idr); intel_vgpu_init_cfg_space(vgpu, param->primary); + vgpu->d3_entered = false; ret = intel_vgpu_init_mmio(vgpu); if (ret) @@ -557,10 +559,15 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, /* full GPU reset or device model level reset */ if (engine_mask == ALL_ENGINES || dmlr) { intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); - intel_vgpu_invalidate_ppgtt(vgpu); + if (engine_mask == ALL_ENGINES) + intel_vgpu_invalidate_ppgtt(vgpu); /*fence will not be reset during virtual reset */ if (dmlr) { - intel_vgpu_reset_gtt(vgpu); + if(!vgpu->d3_entered) { + intel_vgpu_invalidate_ppgtt(vgpu); + intel_vgpu_destroy_all_ppgtt_mm(vgpu); + } + intel_vgpu_reset_ggtt(vgpu, true); intel_vgpu_reset_resource(vgpu); } @@ -573,6 +580,12 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, /* only reset the failsafe mode when dmlr reset */ vgpu->failsafe = false; vgpu->pv_notified = false; + /* + * PCI_D0 is set before dmlr, so reset d3_entered here + * after done using. + */ + if(vgpu->d3_entered) + vgpu->d3_entered = false; } } From 9e7c0efadb86ddb58965561bbca638d44792d78f Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Thu, 9 Jul 2020 15:09:57 +0800 Subject: [PATCH 002/290] drm/i915/gvt: Do not reset pv_notified when vGPU transit from D3->D0 Unlike full initialization like normal boot, guest driver won't pv_notified GVT when vGPU transit from D3->D0. If pv_notified is reset, later vGPU operations will trigger enter into failsafe mode. Considering the fact that vGPU will at least notify GVT pv_notified once before D3/D0 transition, it's safe to skip reset pv_notified in D3->D0. To test this feature, make sure S3 is enabled in QEMU parameters: i440fx: PIIX4_PM.disable_s3=0 q35: ICH9-LPC.disable_s3=0 Also need enable sleep option in guest OS if it's disabled. v2: - Revise commit message to more accurate description. (Kevin) - Split patch by logic. (Zhenyu) Reviewed-by: Zhenyu Wang Signed-off-by: Hang Yuan Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200709071002.247960-3-colin.xu@intel.com --- drivers/gpu/drm/i915/gvt/vgpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index fb12448fe353..8fa9b31a2484 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -579,13 +579,14 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, intel_vgpu_reset_cfg_space(vgpu); /* only reset the failsafe mode when dmlr reset */ vgpu->failsafe = false; - vgpu->pv_notified = false; /* * PCI_D0 is set before dmlr, so reset d3_entered here * after done using. */ if(vgpu->d3_entered) vgpu->d3_entered = false; + else + vgpu->pv_notified = false; } } From e4d7f2d3593e703a4d58b813f332a5c2aef5106f Mon Sep 17 00:00:00 2001 From: Keyur Patel Date: Wed, 10 Jun 2020 23:19:46 -0400 Subject: [PATCH 003/290] ext4: fix spelling mistakes in extents.c Fix spelling issues over the comments in the code. requsted ==> requested deterimined ==> determined insde ==> inside neet ==> need somthing ==> something Signed-off-by: Keyur Patel Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200611031947.165079-1-iamkeyur96@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 221f240eae60..1106ee17aa21 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1915,7 +1915,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, /* * ext4_ext_insert_extent: - * tries to merge requsted extent into the existing extent or + * tries to merge requested extent into the existing extent or * inserts requested extent as new one into the tree, * creating new leaf in the no-space case. */ @@ -3125,7 +3125,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * * * Splits extent [a, b] into two extents [a, @split) and [@split, b], states - * of which are deterimined by split_flag. + * of which are determined by split_flag. * * There are two cases: * a> the extent are splitted into two extent. @@ -3650,7 +3650,7 @@ static int ext4_split_convert_extents(handle_t *handle, eof_block = map->m_lblk + map->m_len; /* * It is safe to convert extent to initialized via explicit - * zeroout only if extent is fully insde i_size or new_size. + * zeroout only if extent is fully inside i_size or new_size. */ depth = ext_depth(inode); ex = path[depth].p_ext; @@ -4495,7 +4495,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, } /* - * Round up offset. This is not fallocate, we neet to zero out + * Round up offset. This is not fallocate, we need to zero out * blocks, so convert interior block aligned part of the range to * unwritten and possibly manually zero out unaligned parts of the * range. @@ -5579,7 +5579,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, } ex1 = path1[path1->p_depth].p_ext; ex2 = path2[path2->p_depth].p_ext; - /* Do we have somthing to swap ? */ + /* Do we have something to swap ? */ if (unlikely(!ex2 || !ex1)) goto finish; From f36e8edb95734c03134db628afa25ee23b8e0d95 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Mon, 3 Aug 2020 10:13:31 +0800 Subject: [PATCH 004/290] ASoC: fsl-asoc-card: Remove fsl_asoc_card_set_bias_level function With this case: aplay -Dhw:x 16khz.wav 24khz.wav There is sound distortion for 24khz.wav. The reason is that setting PLL of WM8962 with set_bias_level function, the bias level is not changed when 24khz.wav is played, then the PLL won't be reset, the clock is not correct, so distortion happens. The resolution of this issue is to remove fsl_asoc_card_set_bias_level. Move PLL configuration to hw_params and hw_free. After removing fsl_asoc_card_set_bias_level, also test WM8960 case, it can work. Fixes: 708b4351f08c ("ASoC: fsl: Add Freescale Generic ASoC Sound Card with ASRC support") Signed-off-by: Shengjiu Wang Acked-by: Nicolin Chen Link: https://lore.kernel.org/r/1596420811-16690-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl-asoc-card.c | 154 ++++++++++++++++------------------ 1 file changed, 70 insertions(+), 84 deletions(-) diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index de136c0a497d..52adedc03245 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -73,6 +73,7 @@ struct cpu_priv { * @codec_priv: CODEC private data * @cpu_priv: CPU private data * @card: ASoC card structure + * @streams: Mask of current active streams * @sample_rate: Current sample rate * @sample_format: Current sample format * @asrc_rate: ASRC sample rate used by Back-Ends @@ -89,6 +90,7 @@ struct fsl_asoc_card_priv { struct codec_priv codec_priv; struct cpu_priv cpu_priv; struct snd_soc_card card; + u8 streams; u32 sample_rate; snd_pcm_format_t sample_format; u32 asrc_rate; @@ -151,21 +153,17 @@ static int fsl_asoc_card_hw_params(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct fsl_asoc_card_priv *priv = snd_soc_card_get_drvdata(rtd->card); bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; + struct codec_priv *codec_priv = &priv->codec_priv; struct cpu_priv *cpu_priv = &priv->cpu_priv; struct device *dev = rtd->card->dev; + unsigned int pll_out; int ret; priv->sample_rate = params_rate(params); priv->sample_format = params_format(params); + priv->streams |= BIT(substream->stream); - /* - * If codec-dai is DAI Master and all configurations are already in the - * set_bias_level(), bypass the remaining settings in hw_params(). - * Note: (dai_fmt & CBM_CFM) includes CBM_CFM and CBM_CFS. - */ - if ((priv->card.set_bias_level && - priv->dai_fmt & SND_SOC_DAIFMT_CBM_CFM) || - fsl_asoc_card_is_ac97(priv)) + if (fsl_asoc_card_is_ac97(priv)) return 0; /* Specific configurations of DAIs starts from here */ @@ -174,7 +172,7 @@ static int fsl_asoc_card_hw_params(struct snd_pcm_substream *substream, cpu_priv->sysclk_dir[tx]); if (ret && ret != -ENOTSUPP) { dev_err(dev, "failed to set sysclk for cpu dai\n"); - return ret; + goto fail; } if (cpu_priv->slot_width) { @@ -182,6 +180,68 @@ static int fsl_asoc_card_hw_params(struct snd_pcm_substream *substream, cpu_priv->slot_width); if (ret && ret != -ENOTSUPP) { dev_err(dev, "failed to set TDM slot for cpu dai\n"); + goto fail; + } + } + + /* Specific configuration for PLL */ + if (codec_priv->pll_id && codec_priv->fll_id) { + if (priv->sample_format == SNDRV_PCM_FORMAT_S24_LE) + pll_out = priv->sample_rate * 384; + else + pll_out = priv->sample_rate * 256; + + ret = snd_soc_dai_set_pll(asoc_rtd_to_codec(rtd, 0), + codec_priv->pll_id, + codec_priv->mclk_id, + codec_priv->mclk_freq, pll_out); + if (ret) { + dev_err(dev, "failed to start FLL: %d\n", ret); + goto fail; + } + + ret = snd_soc_dai_set_sysclk(asoc_rtd_to_codec(rtd, 0), + codec_priv->fll_id, + pll_out, SND_SOC_CLOCK_IN); + + if (ret && ret != -ENOTSUPP) { + dev_err(dev, "failed to set SYSCLK: %d\n", ret); + goto fail; + } + } + + return 0; + +fail: + priv->streams &= ~BIT(substream->stream); + return ret; +} + +static int fsl_asoc_card_hw_free(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct fsl_asoc_card_priv *priv = snd_soc_card_get_drvdata(rtd->card); + struct codec_priv *codec_priv = &priv->codec_priv; + struct device *dev = rtd->card->dev; + int ret; + + priv->streams &= ~BIT(substream->stream); + + if (!priv->streams && codec_priv->pll_id && codec_priv->fll_id) { + /* Force freq to be 0 to avoid error message in codec */ + ret = snd_soc_dai_set_sysclk(asoc_rtd_to_codec(rtd, 0), + codec_priv->mclk_id, + 0, + SND_SOC_CLOCK_IN); + if (ret) { + dev_err(dev, "failed to switch away from FLL: %d\n", ret); + return ret; + } + + ret = snd_soc_dai_set_pll(asoc_rtd_to_codec(rtd, 0), + codec_priv->pll_id, 0, 0, 0); + if (ret && ret != -ENOTSUPP) { + dev_err(dev, "failed to stop FLL: %d\n", ret); return ret; } } @@ -191,6 +251,7 @@ static int fsl_asoc_card_hw_params(struct snd_pcm_substream *substream, static const struct snd_soc_ops fsl_asoc_card_ops = { .hw_params = fsl_asoc_card_hw_params, + .hw_free = fsl_asoc_card_hw_free, }; static int be_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, @@ -254,75 +315,6 @@ static struct snd_soc_dai_link fsl_asoc_card_dai[] = { }, }; -static int fsl_asoc_card_set_bias_level(struct snd_soc_card *card, - struct snd_soc_dapm_context *dapm, - enum snd_soc_bias_level level) -{ - struct fsl_asoc_card_priv *priv = snd_soc_card_get_drvdata(card); - struct snd_soc_pcm_runtime *rtd; - struct snd_soc_dai *codec_dai; - struct codec_priv *codec_priv = &priv->codec_priv; - struct device *dev = card->dev; - unsigned int pll_out; - int ret; - - rtd = snd_soc_get_pcm_runtime(card, &card->dai_link[0]); - codec_dai = asoc_rtd_to_codec(rtd, 0); - if (dapm->dev != codec_dai->dev) - return 0; - - switch (level) { - case SND_SOC_BIAS_PREPARE: - if (dapm->bias_level != SND_SOC_BIAS_STANDBY) - break; - - if (priv->sample_format == SNDRV_PCM_FORMAT_S24_LE) - pll_out = priv->sample_rate * 384; - else - pll_out = priv->sample_rate * 256; - - ret = snd_soc_dai_set_pll(codec_dai, codec_priv->pll_id, - codec_priv->mclk_id, - codec_priv->mclk_freq, pll_out); - if (ret) { - dev_err(dev, "failed to start FLL: %d\n", ret); - return ret; - } - - ret = snd_soc_dai_set_sysclk(codec_dai, codec_priv->fll_id, - pll_out, SND_SOC_CLOCK_IN); - if (ret && ret != -ENOTSUPP) { - dev_err(dev, "failed to set SYSCLK: %d\n", ret); - return ret; - } - break; - - case SND_SOC_BIAS_STANDBY: - if (dapm->bias_level != SND_SOC_BIAS_PREPARE) - break; - - ret = snd_soc_dai_set_sysclk(codec_dai, codec_priv->mclk_id, - codec_priv->mclk_freq, - SND_SOC_CLOCK_IN); - if (ret && ret != -ENOTSUPP) { - dev_err(dev, "failed to switch away from FLL: %d\n", ret); - return ret; - } - - ret = snd_soc_dai_set_pll(codec_dai, codec_priv->pll_id, 0, 0, 0); - if (ret) { - dev_err(dev, "failed to stop FLL: %d\n", ret); - return ret; - } - break; - - default: - break; - } - - return 0; -} - static int fsl_asoc_card_audmux_init(struct device_node *np, struct fsl_asoc_card_priv *priv) { @@ -611,7 +603,6 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) /* Diversify the card configurations */ if (of_device_is_compatible(np, "fsl,imx-audio-cs42888")) { codec_dai_name = "cs42888"; - priv->card.set_bias_level = NULL; priv->cpu_priv.sysclk_freq[TX] = priv->codec_priv.mclk_freq; priv->cpu_priv.sysclk_freq[RX] = priv->codec_priv.mclk_freq; priv->cpu_priv.sysclk_dir[TX] = SND_SOC_CLOCK_OUT; @@ -628,26 +619,22 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) priv->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; } else if (of_device_is_compatible(np, "fsl,imx-audio-wm8962")) { codec_dai_name = "wm8962"; - priv->card.set_bias_level = fsl_asoc_card_set_bias_level; priv->codec_priv.mclk_id = WM8962_SYSCLK_MCLK; priv->codec_priv.fll_id = WM8962_SYSCLK_FLL; priv->codec_priv.pll_id = WM8962_FLL; priv->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; } else if (of_device_is_compatible(np, "fsl,imx-audio-wm8960")) { codec_dai_name = "wm8960-hifi"; - priv->card.set_bias_level = fsl_asoc_card_set_bias_level; priv->codec_priv.fll_id = WM8960_SYSCLK_AUTO; priv->codec_priv.pll_id = WM8960_SYSCLK_AUTO; priv->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM; } else if (of_device_is_compatible(np, "fsl,imx-audio-ac97")) { codec_dai_name = "ac97-hifi"; - priv->card.set_bias_level = NULL; priv->dai_fmt = SND_SOC_DAIFMT_AC97; priv->card.dapm_routes = audio_map_ac97; priv->card.num_dapm_routes = ARRAY_SIZE(audio_map_ac97); } else if (of_device_is_compatible(np, "fsl,imx-audio-mqs")) { codec_dai_name = "fsl-mqs-dai"; - priv->card.set_bias_level = NULL; priv->dai_fmt = SND_SOC_DAIFMT_LEFT_J | SND_SOC_DAIFMT_CBS_CFS | SND_SOC_DAIFMT_NB_NF; @@ -657,7 +644,6 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) priv->card.num_dapm_routes = ARRAY_SIZE(audio_map_tx); } else if (of_device_is_compatible(np, "fsl,imx-audio-wm8524")) { codec_dai_name = "wm8524-hifi"; - priv->card.set_bias_level = NULL; priv->dai_fmt |= SND_SOC_DAIFMT_CBS_CFS; priv->dai_link[1].dpcm_capture = 0; priv->dai_link[2].dpcm_capture = 0; From b023666e6c0165651de18cabcbb65ba14f2db153 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 3 Aug 2020 08:52:33 -0300 Subject: [PATCH 005/290] ASoC: wm8962: Do not remove ADDITIONAL_CONTROL_4 from readable register list Removing ADDITIONAL_CONTROL_4 from the list of readable registers cause audio distortion. This change was sent as a comment below the --- line when submitting commit 658bb297e393 ("ASoC: wm8962: Do not access WM8962_GPIO_BASE"), so it was not supposed to get merged. Keep WM8962_ADDITIONAL_CONTROL_4 inside wm8962_readable_register() to fix the regression. Fixes: 658bb297e393 ("ASoC: wm8962: Do not access WM8962_GPIO_BASE") Reported-by: Shengjiu Wang Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20200803115233.19034-1-festevam@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8962.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 317916cb4e27..0623a2251084 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -151,7 +151,6 @@ static const struct reg_default wm8962_reg[] = { { 40, 0x0000 }, /* R40 - SPKOUTL volume */ { 41, 0x0000 }, /* R41 - SPKOUTR volume */ - { 48, 0x0000 }, /* R48 - Additional control(4) */ { 49, 0x0010 }, /* R49 - Class D Control 1 */ { 51, 0x0003 }, /* R51 - Class D Control 2 */ @@ -842,6 +841,7 @@ static bool wm8962_readable_register(struct device *dev, unsigned int reg) case WM8962_SPKOUTL_VOLUME: case WM8962_SPKOUTR_VOLUME: case WM8962_THERMAL_SHUTDOWN_STATUS: + case WM8962_ADDITIONAL_CONTROL_4: case WM8962_CLASS_D_CONTROL_1: case WM8962_CLASS_D_CONTROL_2: case WM8962_CLOCKING_4: From ccff7bd468d5e0595176656a051ef67c01f01968 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 30 Jul 2020 20:31:38 +0800 Subject: [PATCH 006/290] ASoC: amd: renoir: restore two more registers during resume Recently we found an issue about the suspend and resume. If dmic is recording the sound, and we run suspend and resume, after the resume, the dmic can't work well anymore. we need to close the app and reopen the app, then the dmic could record the sound again. For example, we run "arecord -D hw:CARD=acp,DEV=0 -f S32_LE -c 2 -r 48000 test.wav", then suspend and resume, after the system resume back, we speak to the dmic. then stop the arecord, use aplay to play the test.wav, we could hear the sound recorded after resume is weird, it is not what we speak to the dmic. I found two registers are set in the dai_hw_params(), if the two registers are set during the resume, this issue could be fixed. Move the code of the dai_hw_params() into the pdm_dai_trigger(), then these two registers will be set during resume since pdm_dai_trigger() will be called during resume. And delete the empty function dai_hw_params(). Signed-off-by: Hui Wang Reviewed-by: Vijendar Mukunda Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200730123138.5659-1-hui.wang@canonical.com Signed-off-by: Mark Brown --- sound/soc/amd/renoir/acp3x-pdm-dma.c | 33 ++++++++++------------------ 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/sound/soc/amd/renoir/acp3x-pdm-dma.c b/sound/soc/amd/renoir/acp3x-pdm-dma.c index 623dfd3ea705..7b14d9a81b97 100644 --- a/sound/soc/amd/renoir/acp3x-pdm-dma.c +++ b/sound/soc/amd/renoir/acp3x-pdm-dma.c @@ -314,40 +314,30 @@ static int acp_pdm_dma_close(struct snd_soc_component *component, return 0; } -static int acp_pdm_dai_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, - struct snd_soc_dai *dai) -{ - struct pdm_stream_instance *rtd; - unsigned int ch_mask; - - rtd = substream->runtime->private_data; - switch (params_channels(params)) { - case TWO_CH: - ch_mask = 0x00; - break; - default: - return -EINVAL; - } - rn_writel(ch_mask, rtd->acp_base + ACP_WOV_PDM_NO_OF_CHANNELS); - rn_writel(PDM_DECIMATION_FACTOR, rtd->acp_base + - ACP_WOV_PDM_DECIMATION_FACTOR); - return 0; -} - static int acp_pdm_dai_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { struct pdm_stream_instance *rtd; int ret; bool pdm_status; + unsigned int ch_mask; rtd = substream->runtime->private_data; ret = 0; + switch (substream->runtime->channels) { + case TWO_CH: + ch_mask = 0x00; + break; + default: + return -EINVAL; + } switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + rn_writel(ch_mask, rtd->acp_base + ACP_WOV_PDM_NO_OF_CHANNELS); + rn_writel(PDM_DECIMATION_FACTOR, rtd->acp_base + + ACP_WOV_PDM_DECIMATION_FACTOR); rtd->bytescount = acp_pdm_get_byte_count(rtd, substream->stream); pdm_status = check_pdm_dma_status(rtd->acp_base); @@ -369,7 +359,6 @@ static int acp_pdm_dai_trigger(struct snd_pcm_substream *substream, } static struct snd_soc_dai_ops acp_pdm_dai_ops = { - .hw_params = acp_pdm_dai_hw_params, .trigger = acp_pdm_dai_trigger, }; From b191f01a3756f8d5d0b92bad0a07dac7e373d8be Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Aug 2020 16:18:46 +0200 Subject: [PATCH 007/290] ASoC: tegra: tegra186_dspk: Fix compile warning with CONFIG_PM=n Fix trivial compile warnings wrt unused functions by adding __maybe_unused prefix: sound/soc/tegra/tegra186_dspk.c:74:12: warning: 'tegra186_dspk_runtime_suspend' defined but not used [-Wunused-function] sound/soc/tegra/tegra186_dspk.c:86:12: warning: 'tegra186_dspk_runtime_resume' defined but not used [-Wunused-function] Fixes: 327ef6470266 ("ASoC: tegra: Add Tegra186 based DSPK driver") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200803141850.23713-2-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/tegra/tegra186_dspk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra186_dspk.c b/sound/soc/tegra/tegra186_dspk.c index fe7117171a0e..0cbe31e2c7e9 100644 --- a/sound/soc/tegra/tegra186_dspk.c +++ b/sound/soc/tegra/tegra186_dspk.c @@ -71,7 +71,7 @@ static int tegra186_dspk_put_control(struct snd_kcontrol *kcontrol, return 0; } -static int tegra186_dspk_runtime_suspend(struct device *dev) +static int __maybe_unused tegra186_dspk_runtime_suspend(struct device *dev) { struct tegra186_dspk *dspk = dev_get_drvdata(dev); @@ -83,7 +83,7 @@ static int tegra186_dspk_runtime_suspend(struct device *dev) return 0; } -static int tegra186_dspk_runtime_resume(struct device *dev) +static int __maybe_unused tegra186_dspk_runtime_resume(struct device *dev) { struct tegra186_dspk *dspk = dev_get_drvdata(dev); int err; From 1337f2c5f104c84d5a943b2eb644db3eaf4a64e0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Aug 2020 16:18:47 +0200 Subject: [PATCH 008/290] ASoC: tegra: tegra210_admaif: Fix compile warning with CONFIG_PM=n Fix trivial compile warnings wrt unused functions by adding __maybe_unused prefix: sound/soc/tegra/tegra210_admaif.c:232:12: warning: 'tegra_admaif_runtime_resume' defined but not used [-Wunused-function] sound/soc/tegra/tegra210_ahub.c:567:12: warning: 'tegra_ahub_runtime_suspend' defined but not used [-Wunused-function] Fixes: f74028e159bb ("ASoC: tegra: Add Tegra210 based ADMAIF driver") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200803141850.23713-3-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_admaif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_admaif.c b/sound/soc/tegra/tegra210_admaif.c index 4894e8e6ee7f..1268046b345d 100644 --- a/sound/soc/tegra/tegra210_admaif.c +++ b/sound/soc/tegra/tegra210_admaif.c @@ -219,7 +219,7 @@ static const struct regmap_config tegra186_admaif_regmap_config = { .cache_type = REGCACHE_FLAT, }; -static int tegra_admaif_runtime_suspend(struct device *dev) +static int __maybe_unused tegra_admaif_runtime_suspend(struct device *dev) { struct tegra_admaif *admaif = dev_get_drvdata(dev); @@ -229,7 +229,7 @@ static int tegra_admaif_runtime_suspend(struct device *dev) return 0; } -static int tegra_admaif_runtime_resume(struct device *dev) +static int __maybe_unused tegra_admaif_runtime_resume(struct device *dev) { struct tegra_admaif *admaif = dev_get_drvdata(dev); From fafac559604bd2e74f5f6febd58682df3738cdd9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Aug 2020 16:18:48 +0200 Subject: [PATCH 009/290] ASoC: tegra: tegra210_ahub: Fix compile warning with CONFIG_PM=n Fix trivial compile warnings wrt unused functions by adding __maybe_unused prefix: sound/soc/tegra/tegra210_ahub.c:567:12: warning: 'tegra_ahub_runtime_suspend' defined but not used [-Wunused-function] sound/soc/tegra/tegra210_ahub.c:579:12: warning: 'tegra_ahub_runtime_resume' defined but not used [-Wunused-function] Fixes: 16e1bcc2caf4 ("ASoC: tegra: Add Tegra210 based AHUB driver") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200803141850.23713-4-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_ahub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_ahub.c b/sound/soc/tegra/tegra210_ahub.c index 5123a96fdde8..66287a7c9865 100644 --- a/sound/soc/tegra/tegra210_ahub.c +++ b/sound/soc/tegra/tegra210_ahub.c @@ -564,7 +564,7 @@ static const struct of_device_id tegra_ahub_of_match[] = { }; MODULE_DEVICE_TABLE(of, tegra_ahub_of_match); -static int tegra_ahub_runtime_suspend(struct device *dev) +static int __maybe_unused tegra_ahub_runtime_suspend(struct device *dev) { struct tegra_ahub *ahub = dev_get_drvdata(dev); @@ -576,7 +576,7 @@ static int tegra_ahub_runtime_suspend(struct device *dev) return 0; } -static int tegra_ahub_runtime_resume(struct device *dev) +static int __maybe_unused tegra_ahub_runtime_resume(struct device *dev) { struct tegra_ahub *ahub = dev_get_drvdata(dev); int err; From 7543f16a04465db95e83e8409e246f49f35c874a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Aug 2020 16:18:49 +0200 Subject: [PATCH 010/290] ASoC: tegra: tegra210_dmic: Fix compile warning with CONFIG_PM=n Fix trivial compile warnings wrt unused functions by adding __maybe_unused prefix: sound/soc/tegra/tegra210_dmic.c:43:12: warning: 'tegra210_dmic_runtime_suspend' defined but not used [-Wunused-function] sound/soc/tegra/tegra210_dmic.c:55:12: warning: 'tegra210_dmic_runtime_resume' defined but not used [-Wunused-function] Fixes: 8c8ff982e9e2 ("ASoC: tegra: Add Tegra210 based DMIC driver") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200803141850.23713-5-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_dmic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_dmic.c b/sound/soc/tegra/tegra210_dmic.c index d682414ad90d..a661f40bc41c 100644 --- a/sound/soc/tegra/tegra210_dmic.c +++ b/sound/soc/tegra/tegra210_dmic.c @@ -40,7 +40,7 @@ static const struct reg_default tegra210_dmic_reg_defaults[] = { { TEGRA210_DMIC_LP_BIQUAD_1_COEF_4, 0x0 }, }; -static int tegra210_dmic_runtime_suspend(struct device *dev) +static int __maybe_unused tegra210_dmic_runtime_suspend(struct device *dev) { struct tegra210_dmic *dmic = dev_get_drvdata(dev); @@ -52,7 +52,7 @@ static int tegra210_dmic_runtime_suspend(struct device *dev) return 0; } -static int tegra210_dmic_runtime_resume(struct device *dev) +static int __maybe_unused tegra210_dmic_runtime_resume(struct device *dev) { struct tegra210_dmic *dmic = dev_get_drvdata(dev); int err; From 823279c374669a15a139a29b891dc0d7460262c6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Aug 2020 16:18:50 +0200 Subject: [PATCH 011/290] ASoC: tegra: tegra210_i2s: Fix compile warning with CONFIG_PM=n Fix trivial compile warnings wrt unused functions by adding __maybe_unused prefix: sound/soc/tegra/tegra210_i2s.c:167:12: warning: 'tegra210_i2s_runtime_suspend' defined but not used [-Wunused-function] sound/soc/tegra/tegra210_i2s.c:179:12: warning: 'tegra210_i2s_runtime_resume' defined but not used [-Wunused-function] Fixes: c0bfa98349d1 ("ASoC: tegra: Add Tegra210 based I2S driver") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200803141850.23713-6-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_i2s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_i2s.c b/sound/soc/tegra/tegra210_i2s.c index 722092181583..a383bd5c51cd 100644 --- a/sound/soc/tegra/tegra210_i2s.c +++ b/sound/soc/tegra/tegra210_i2s.c @@ -164,7 +164,7 @@ static int tegra210_i2s_init(struct snd_soc_dapm_widget *w, return tegra210_i2s_sw_reset(compnt, is_playback); } -static int tegra210_i2s_runtime_suspend(struct device *dev) +static int __maybe_unused tegra210_i2s_runtime_suspend(struct device *dev) { struct tegra210_i2s *i2s = dev_get_drvdata(dev); @@ -176,7 +176,7 @@ static int tegra210_i2s_runtime_suspend(struct device *dev) return 0; } -static int tegra210_i2s_runtime_resume(struct device *dev) +static int __maybe_unused tegra210_i2s_runtime_resume(struct device *dev) { struct tegra210_i2s *i2s = dev_get_drvdata(dev); int err; From 9493755d7c1156b00b58376752d4c3df7c0a01ec Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 3 Aug 2020 16:46:30 +0200 Subject: [PATCH 012/290] ASoC: fsl: Fix unused variable warning The variable rtd was left unused in psc_dma_free(), even unnoticed during conversion to a new style: sound/soc/fsl/mpc5200_dma.c:342:30: warning: unused variable 'rtd' [-Wunused-variable] Drop the superfluous one. Fixes: 6d1048bc1152 ("ASoC: fsl: mpc5200_dma: remove snd_pcm_ops") Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200803144630.9615-1-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/fsl/mpc5200_dma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c index 9e4f66b6b92b..231984882176 100644 --- a/sound/soc/fsl/mpc5200_dma.c +++ b/sound/soc/fsl/mpc5200_dma.c @@ -339,7 +339,6 @@ static int psc_dma_new(struct snd_soc_component *component, static void psc_dma_free(struct snd_soc_component *component, struct snd_pcm *pcm) { - struct snd_soc_pcm_runtime *rtd = pcm->private_data; struct snd_pcm_substream *substream; int stream; From ddf75be47ca748f8b12d28ac64d624354fddf189 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 3 Aug 2020 13:09:01 +0200 Subject: [PATCH 013/290] spi: Prevent adding devices below an unregistering controller CONFIG_OF_DYNAMIC and CONFIG_ACPI allow adding SPI devices at runtime using a DeviceTree overlay or DSDT patch. CONFIG_SPI_SLAVE allows the same via sysfs. But there are no precautions to prevent adding a device below a controller that's being removed. Such a device is unusable and may not even be able to unbind cleanly as it becomes inaccessible once the controller has been torn down. E.g. it is then impossible to quiesce the device's interrupt. of_spi_notify() and acpi_spi_notify() do hold a ref on the controller, but otherwise run lockless against spi_unregister_controller(). Fix by holding the spi_add_lock in spi_unregister_controller() and bailing out of spi_add_device() if the controller has been unregistered concurrently. Fixes: ce79d54ae447 ("spi/of: Add OF notifier handler") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v3.19+ Cc: Geert Uytterhoeven Cc: Octavian Purdila Cc: Pantelis Antoniou Link: https://lore.kernel.org/r/a8c3205088a969dc8410eec1eba9aface60f36af.1596451035.git.lukas@wunner.de Signed-off-by: Mark Brown --- drivers/spi/Kconfig | 3 +++ drivers/spi/spi.c | 21 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index c3008e423f59..c6ea760ea5f0 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -1017,4 +1017,7 @@ config SPI_SLAVE_SYSTEM_CONTROL endif # SPI_SLAVE +config SPI_DYNAMIC + def_bool ACPI || OF_DYNAMIC || SPI_SLAVE + endif # SPI diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 0b260484b4f5..92b8fb416dca 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -475,6 +475,12 @@ static LIST_HEAD(spi_controller_list); */ static DEFINE_MUTEX(board_lock); +/* + * Prevents addition of devices with same chip select and + * addition of devices below an unregistering controller. + */ +static DEFINE_MUTEX(spi_add_lock); + /** * spi_alloc_device - Allocate a new SPI device * @ctlr: Controller to which device is connected @@ -554,7 +560,6 @@ static int spi_dev_check(struct device *dev, void *data) */ int spi_add_device(struct spi_device *spi) { - static DEFINE_MUTEX(spi_add_lock); struct spi_controller *ctlr = spi->controller; struct device *dev = ctlr->dev.parent; int status; @@ -582,6 +587,13 @@ int spi_add_device(struct spi_device *spi) goto done; } + /* Controller may unregister concurrently */ + if (IS_ENABLED(CONFIG_SPI_DYNAMIC) && + !device_is_registered(&ctlr->dev)) { + status = -ENODEV; + goto done; + } + /* Descriptors take precedence */ if (ctlr->cs_gpiods) spi->cs_gpiod = ctlr->cs_gpiods[spi->chip_select]; @@ -2797,6 +2809,10 @@ void spi_unregister_controller(struct spi_controller *ctlr) struct spi_controller *found; int id = ctlr->bus_num; + /* Prevent addition of new devices, unregister existing ones */ + if (IS_ENABLED(CONFIG_SPI_DYNAMIC)) + mutex_lock(&spi_add_lock); + device_for_each_child(&ctlr->dev, NULL, __unregister); /* First make sure that this controller was ever added */ @@ -2817,6 +2833,9 @@ void spi_unregister_controller(struct spi_controller *ctlr) if (found == ctlr) idr_remove(&spi_master_idr, id); mutex_unlock(&board_lock); + + if (IS_ENABLED(CONFIG_SPI_DYNAMIC)) + mutex_unlock(&spi_add_lock); } EXPORT_SYMBOL_GPL(spi_unregister_controller); From ae1ba50f1e706dfd7ce402ac52c1f1f10becad68 Mon Sep 17 00:00:00 2001 From: Tobias Schramm Date: Tue, 4 Aug 2020 21:51:36 +0200 Subject: [PATCH 014/290] spi: stm32: clear only asserted irq flags on interrupt Previously the stm32h7 interrupt thread cleared all non-masked interrupts. If an interrupt was to occur during the handling of another interrupt its flag would be unset, resulting in a lost interrupt. This patches fixes the issue by clearing only the currently set interrupt flags. Signed-off-by: Tobias Schramm Link: https://lore.kernel.org/r/20200804195136.1485392-1-t.schramm@manjaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 4c643dfc7fbb..4a21feae0103 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -966,7 +966,7 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0))) stm32h7_spi_read_rxfifo(spi, false); - writel_relaxed(mask, spi->base + STM32H7_SPI_IFCR); + writel_relaxed(sr & mask, spi->base + STM32H7_SPI_IFCR); spin_unlock_irqrestore(&spi->lock, flags); From e0f49d270d9d04beec113aa7cfacac6bddcb5765 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 13 Jun 2020 19:12:24 +0200 Subject: [PATCH 015/290] ext4: delete unnecessary checks before brelse() The brelse() function tests whether its argument is NULL and then returns immediately. Thus remove the tests which are not needed around the shown calls. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/0d713702-072f-a89c-20ec-ca70aa83a432@web.de Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 6 ++---- fs/ext4/xattr.c | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1106ee17aa21..4af3f36c8351 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -693,10 +693,8 @@ void ext4_ext_drop_refs(struct ext4_ext_path *path) return; depth = path->p_depth; for (i = 0; i <= depth; i++, path++) { - if (path->p_bh) { - brelse(path->p_bh); - path->p_bh = NULL; - } + brelse(path->p_bh); + path->p_bh = NULL; } } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 7d2f6576d954..cba4b877c606 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1356,8 +1356,7 @@ static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode, block = 0; while (wsize < bufsize) { - if (bh != NULL) - brelse(bh); + brelse(bh); csize = (bufsize - wsize) > blocksize ? blocksize : bufsize - wsize; bh = ext4_getblk(handle, ea_inode, block, 0); From e030a28810daf48b43ef6e92fa502ae410b12c8c Mon Sep 17 00:00:00 2001 From: Dio Putra Date: Sun, 14 Jun 2020 11:45:44 +0700 Subject: [PATCH 016/290] ext4: fix coding style in file.c Fixed a few coding style issues in file.c Signed-off-by: Dio Putra Link: https://lore.kernel.org/r/239fcd8f-d33f-8621-9e82-0416dd3f9c94@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/file.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2a01e31a032c..453cafecf5cc 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -145,8 +145,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp) /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1) && - !EXT4_I(inode)->i_reserved_data_blocks) - { + !EXT4_I(inode)->i_reserved_data_blocks) { down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); @@ -810,7 +809,7 @@ static int ext4_sample_last_mounted(struct super_block *sb, return err; } -static int ext4_file_open(struct inode * inode, struct file * filp) +static int ext4_file_open(struct inode *inode, struct file *filp) { int ret; From 24dc9864914eb5813173cfa53313fcd02e4aea7d Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 17 Jun 2020 11:25:49 +0200 Subject: [PATCH 017/290] jbd2: make sure jh have b_transaction set in refile/unfile_buffer Callers of __jbd2_journal_unfile_buffer() and __jbd2_journal_refile_buffer() assume that the b_transaction is set. In fact if it's not, we can end up with journal_head refcounting errors leading to crash much later that might be very hard to track down. Add asserts to make sure that is the case. We also make sure that b_next_transaction is NULL in __jbd2_journal_unfile_buffer() since the callers expect that as well and we should not get into that stage in this state anyway, leading to problems later on if we do. Tested with fstests. Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200617092549.6712-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/jbd2/transaction.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e91aad3637a2..e65e0aca2826 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2026,6 +2026,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) */ static void __jbd2_journal_unfile_buffer(struct journal_head *jh) { + J_ASSERT_JH(jh, jh->b_transaction != NULL); + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + __jbd2_journal_temp_unlink_buffer(jh); jh->b_transaction = NULL; } @@ -2572,6 +2575,13 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh) was_dirty = test_clear_buffer_jbddirty(bh); __jbd2_journal_temp_unlink_buffer(jh); + + /* + * b_transaction must be set, otherwise the new b_transaction won't + * be holding jh reference + */ + J_ASSERT_JH(jh, jh->b_transaction != NULL); + /* * We set b_transaction here because b_next_transaction will inherit * our jh reference and thus __jbd2_journal_file_buffer() must not From 5872331b3d91820e14716632ebb56b1399b34fe1 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 17 Jun 2020 14:19:04 -0500 Subject: [PATCH 018/290] ext4: fix potential negative array index in do_split() If for any reason a directory passed to do_split() does not have enough active entries to exceed half the size of the block, we can end up iterating over all "count" entries without finding a split point. In this case, count == move, and split will be zero, and we will attempt a negative index into map[]. Guard against this by detecting this case, and falling back to split-to-half-of-count instead; in this case we will still have plenty of space (> half blocksize) in each split block. Fixes: ef2b02d3e617 ("ext34: ensure do_split leaves enough free space in both blocks") Signed-off-by: Eric Sandeen Reviewed-by: Andreas Dilger Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/f53e246b-647c-64bb-16ec-135383c70ad7@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 56738b538ddf..ef606301a106 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1858,7 +1858,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, blocksize, hinfo, map); map -= count; dx_sort_map(map, count); - /* Split the existing block in the middle, size-wise */ + /* Ensure that neither split block is over half full */ size = 0; move = 0; for (i = count-1; i >= 0; i--) { @@ -1868,8 +1868,18 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, size += map[i].size; move++; } - /* map index at which we will split */ - split = count - move; + /* + * map index at which we will split + * + * If the sum of active entries didn't exceed half the block size, just + * split it in half by count; each resulting block will have at least + * half the space free. + */ + if (i > 0) + split = count - move; + else + split = count/2; + hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n", From ef3f5830b859604eda8723c26d90ab23edc027a4 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Sat, 20 Jun 2020 14:19:48 +0800 Subject: [PATCH 019/290] jbd2: add the missing unlock_buffer() in the error path of jbd2_write_superblock() jbd2_write_superblock() is under the buffer lock of journal superblock before ending that superblock write, so add a missing unlock_buffer() in in the error path before submitting buffer. Fixes: 742b06b5628f ("jbd2: check superblock mapped prior to committing") Signed-off-by: zhangyi (F) Reviewed-by: Ritesh Harjani Cc: stable@kernel.org Link: https://lore.kernel.org/r/20200620061948.2049579-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e4944436e733..5493a0da23dd 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1367,8 +1367,10 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) int ret; /* Buffer got discarded which means block device got invalidated */ - if (!buffer_mapped(bh)) + if (!buffer_mapped(bh)) { + unlock_buffer(bh); return -EIO; + } trace_jbd2_write_superblock(journal, write_flags); if (!(journal->j_flags & JBD2_BARRIER)) From 9a5d265fed014115f35e598022c956e5d2fb863e Mon Sep 17 00:00:00 2001 From: zhengliang Date: Wed, 1 Jul 2020 16:30:27 +0800 Subject: [PATCH 020/290] ext4: lost matching-pair of trace in ext4_truncate It should call trace exit in all return path for ext4_truncate. Signed-off-by: zhengliang Reviewed-by: Andreas Dilger Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/20200701083027.45996-1-zhengliang6@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 10dd470876b3..6187c8880c02 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4163,7 +4163,7 @@ int ext4_truncate(struct inode *inode) trace_ext4_truncate_enter(inode); if (!ext4_can_truncate(inode)) - return 0; + goto out_trace; if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); @@ -4172,16 +4172,14 @@ int ext4_truncate(struct inode *inode) int has_inline = 1; err = ext4_inline_data_truncate(inode, &has_inline); - if (err) - return err; - if (has_inline) - return 0; + if (err || has_inline) + goto out_trace; } /* If we zero-out tail of the page, we have to create jinode for jbd2 */ if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { if (ext4_inode_attach_jinode(inode) < 0) - return 0; + goto out_trace; } if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) @@ -4190,8 +4188,10 @@ int ext4_truncate(struct inode *inode) credits = ext4_blocks_for_truncate(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out_trace; + } if (inode->i_size & (inode->i_sb->s_blocksize - 1)) ext4_block_truncate_page(handle, mapping, inode->i_size); @@ -4242,6 +4242,7 @@ int ext4_truncate(struct inode *inode) err = err2; ext4_journal_stop(handle); +out_trace: trace_ext4_truncate_exit(inode); return err; } From e5f78159d62f79eb7923561275290b7c95e7f03f Mon Sep 17 00:00:00 2001 From: Yi Zhuang Date: Mon, 29 Jun 2020 20:26:21 +0800 Subject: [PATCH 021/290] ext4: lost matching-pair of trace in ext4_unlink If dquot_initialize() return non-zero and trace of ext4_unlink_enter/exit enabled then the matching-pair of trace_exit will lost in log. Signed-off-by: Yi Zhuang Reviewed-by: Andreas Dilger Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/20200629122621.129953-1-zhuangyi1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ef606301a106..cb2eb1967e73 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3203,30 +3203,33 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) * in separate transaction */ retval = dquot_initialize(dir); if (retval) - return retval; + goto out_trace; retval = dquot_initialize(d_inode(dentry)); if (retval) - return retval; + goto out_trace; - retval = -ENOENT; bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); - if (IS_ERR(bh)) - return PTR_ERR(bh); - if (!bh) - goto end_unlink; + if (IS_ERR(bh)) { + retval = PTR_ERR(bh); + goto out_trace; + } + if (!bh) { + retval = -ENOENT; + goto out_trace; + } inode = d_inode(dentry); - retval = -EFSCORRUPTED; - if (le32_to_cpu(de->inode) != inode->i_ino) - goto end_unlink; + if (le32_to_cpu(de->inode) != inode->i_ino) { + retval = -EFSCORRUPTED; + goto out_bh; + } handle = ext4_journal_start(dir, EXT4_HT_DIR, EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) { retval = PTR_ERR(handle); - handle = NULL; - goto end_unlink; + goto out_bh; } if (IS_DIRSYNC(dir)) @@ -3234,12 +3237,12 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) retval = ext4_delete_entry(handle, dir, de, bh); if (retval) - goto end_unlink; + goto out_handle; dir->i_ctime = dir->i_mtime = current_time(dir); ext4_update_dx_flag(dir); retval = ext4_mark_inode_dirty(handle, dir); if (retval) - goto end_unlink; + goto out_handle; if (inode->i_nlink == 0) ext4_warning_inode(inode, "Deleting file '%.*s' with no links", dentry->d_name.len, dentry->d_name.name); @@ -3261,10 +3264,11 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) d_invalidate(dentry); #endif -end_unlink: +out_handle: + ext4_journal_stop(handle); +out_bh: brelse(bh); - if (handle) - ext4_journal_stop(handle); +out_trace: trace_ext4_unlink_exit(dentry, retval); return retval; } From e65bf6e468c39a60c1dfae439198ad875bb8a870 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 6 Jul 2020 21:03:39 +0200 Subject: [PATCH 022/290] ext4: replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Link: https://lore.kernel.org/r/20200706190339.20709-1-grandmaster@al2klimov.de Signed-off-by: Theodore Ts'o --- Documentation/filesystems/ext4/about.rst | 2 +- fs/ext4/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/ext4/about.rst b/Documentation/filesystems/ext4/about.rst index 0aadba052264..cc76b577d2f4 100644 --- a/Documentation/filesystems/ext4/about.rst +++ b/Documentation/filesystems/ext4/about.rst @@ -39,6 +39,6 @@ entry. Other References ---------------- -Also see http://www.nongnu.org/ext2-doc/ for quite a collection of +Also see https://www.nongnu.org/ext2-doc/ for quite a collection of information about ext2/3. Here's another old reference: http://wiki.osdev.org/Ext2 diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 1afa5a4bcb5f..619dd35ddd48 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -110,7 +110,7 @@ config EXT4_KUNIT_TESTS This builds the ext4 KUnit tests. KUnit tests run during boot and output the results to the debug log - in TAP format (http://testanything.org/). Only useful for kernel devs + in TAP format (https://testanything.org/). Only useful for kernel devs running KUnit test harness and are not for inclusion into a production build. From 0b3171b6d195637f84ddf8b59bae818ea20bc8ac Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Jul 2020 17:35:16 +0200 Subject: [PATCH 023/290] ext4: do not block RWF_NOWAIT dio write on unallocated space Since commit 378f32bab371 ("ext4: introduce direct I/O write using iomap infrastructure") we don't properly bail out of RWF_NOWAIT direct IO write if underlying blocks are not allocated. Also ext4_dio_write_checks() does not honor RWF_NOWAIT when re-acquiring i_rwsem. Fix both issues. Fixes: 378f32bab371 ("ext4: introduce direct I/O write using iomap infrastructure") Cc: stable@kernel.org Reported-by: Filipe Manana Signed-off-by: Jan Kara Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/20200708153516.9507-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 453cafecf5cc..7a2720517bbb 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -427,6 +427,10 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, */ if (*ilock_shared && (!IS_NOSEC(inode) || *extend || !ext4_overwrite_io(inode, offset, count))) { + if (iocb->ki_flags & IOCB_NOWAIT) { + ret = -EAGAIN; + goto out; + } inode_unlock_shared(inode); *ilock_shared = false; inode_lock(inode); From 11215630aada28307ba555a43138db6ac54fa825 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 10 Jul 2020 16:07:59 +0200 Subject: [PATCH 024/290] ext4: don't BUG on inconsistent journal feature A customer has reported a BUG_ON in ext4_clear_journal_err() hitting during an LTP testing. Either this has been caused by a test setup issue where the filesystem was being overwritten while LTP was mounting it or the journal replay has overwritten the superblock with invalid data. In either case it is preferable we don't take the machine down with a BUG_ON. So handle the situation of unexpectedly missing has_journal feature more gracefully. We issue warning and fail the mount in the cases where the race window is narrow and the failed check is most likely a programming error. In cases where fs corruption is more likely, we do full ext4_error() handling before failing mount / remount. Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200710140759.18031-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 68 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 330957ed1f05..9fdad843b30e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -66,10 +66,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_show_options(struct seq_file *seq, struct dentry *root); static int ext4_commit_super(struct super_block *sb, int sync); -static void ext4_mark_recovery_complete(struct super_block *sb, +static int ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); -static void ext4_clear_journal_err(struct super_block *sb, - struct ext4_super_block *es); +static int ext4_clear_journal_err(struct super_block *sb, + struct ext4_super_block *es); static int ext4_sync_fs(struct super_block *sb, int wait); static int ext4_remount(struct super_block *sb, int *flags, char *data); static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); @@ -4770,7 +4770,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; if (needs_recovery) { ext4_msg(sb, KERN_INFO, "recovery complete"); - ext4_mark_recovery_complete(sb, es); + err = ext4_mark_recovery_complete(sb, es); + if (err) + goto failed_mount8; } if (EXT4_SB(sb)->s_journal) { if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) @@ -4813,10 +4815,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); goto failed_mount; -#ifdef CONFIG_QUOTA failed_mount8: ext4_unregister_sysfs(sb); -#endif failed_mount7: ext4_unregister_li_request(sb); failed_mount6: @@ -4956,7 +4956,8 @@ static journal_t *ext4_get_journal(struct super_block *sb, struct inode *journal_inode; journal_t *journal; - BUG_ON(!ext4_has_feature_journal(sb)); + if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) + return NULL; journal_inode = ext4_get_journal_inode(sb, journal_inum); if (!journal_inode) @@ -4986,7 +4987,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, struct ext4_super_block *es; struct block_device *bdev; - BUG_ON(!ext4_has_feature_journal(sb)); + if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) + return NULL; bdev = ext4_blkdev_get(j_dev, sb); if (bdev == NULL) @@ -5078,7 +5080,8 @@ static int ext4_load_journal(struct super_block *sb, int err = 0; int really_read_only; - BUG_ON(!ext4_has_feature_journal(sb)); + if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) + return -EFSCORRUPTED; if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { @@ -5148,7 +5151,12 @@ static int ext4_load_journal(struct super_block *sb, } EXT4_SB(sb)->s_journal = journal; - ext4_clear_journal_err(sb, es); + err = ext4_clear_journal_err(sb, es); + if (err) { + EXT4_SB(sb)->s_journal = NULL; + jbd2_journal_destroy(journal); + return err; + } if (!really_read_only && journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { @@ -5244,26 +5252,32 @@ static int ext4_commit_super(struct super_block *sb, int sync) * remounting) the filesystem readonly, then we will end up with a * consistent fs on disk. Record that fact. */ -static void ext4_mark_recovery_complete(struct super_block *sb, - struct ext4_super_block *es) +static int ext4_mark_recovery_complete(struct super_block *sb, + struct ext4_super_block *es) { + int err; journal_t *journal = EXT4_SB(sb)->s_journal; if (!ext4_has_feature_journal(sb)) { - BUG_ON(journal != NULL); - return; + if (journal != NULL) { + ext4_error(sb, "Journal got removed while the fs was " + "mounted!"); + return -EFSCORRUPTED; + } + return 0; } jbd2_journal_lock_updates(journal); - if (jbd2_journal_flush(journal) < 0) + err = jbd2_journal_flush(journal); + if (err < 0) goto out; if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) { ext4_clear_feature_journal_needs_recovery(sb); ext4_commit_super(sb, 1); } - out: jbd2_journal_unlock_updates(journal); + return err; } /* @@ -5271,14 +5285,17 @@ static void ext4_mark_recovery_complete(struct super_block *sb, * has recorded an error from a previous lifetime, move that error to the * main filesystem now. */ -static void ext4_clear_journal_err(struct super_block *sb, +static int ext4_clear_journal_err(struct super_block *sb, struct ext4_super_block *es) { journal_t *journal; int j_errno; const char *errstr; - BUG_ON(!ext4_has_feature_journal(sb)); + if (!ext4_has_feature_journal(sb)) { + ext4_error(sb, "Journal got removed while the fs was mounted!"); + return -EFSCORRUPTED; + } journal = EXT4_SB(sb)->s_journal; @@ -5303,6 +5320,7 @@ static void ext4_clear_journal_err(struct super_block *sb, jbd2_journal_clear_err(journal); jbd2_journal_update_sb_errno(journal); } + return 0; } /* @@ -5573,8 +5591,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) (sbi->s_mount_state & EXT4_VALID_FS)) es->s_state = cpu_to_le16(sbi->s_mount_state); - if (sbi->s_journal) + if (sbi->s_journal) { + /* + * We let remount-ro finish even if marking fs + * as clean failed... + */ ext4_mark_recovery_complete(sb, es); + } if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); } else { @@ -5622,8 +5645,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) * been changed by e2fsck since we originally mounted * the partition.) */ - if (sbi->s_journal) - ext4_clear_journal_err(sb, es); + if (sbi->s_journal) { + err = ext4_clear_journal_err(sb, es); + if (err) + goto restore_opts; + } sbi->s_mount_state = le16_to_cpu(es->s_state); err = ext4_setup_super(sb, es, 0); From 2a12e147da38807f8aab03c92de99a70ab983a96 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 12 Jul 2020 20:10:12 -0700 Subject: [PATCH 025/290] ext4: don't hardcode bit values in EXT4_FL_USER_* Define the EXT4_FL_USER_* constants by OR-ing together the appropriate flags, rather than hard-coding a numeric value. This makes it much easier to see which flags are listed. No change in the actual values. Signed-off-by: Eric Biggers Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200713031012.192440-1-ebiggers@kernel.org Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 42f5060f3cdf..b603a28a3696 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -434,8 +434,34 @@ struct flex_groups { #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_FL_USER_VISIBLE 0x725BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x624BC0FF /* User modifiable flags */ +/* User modifiable flags */ +#define EXT4_FL_USER_MODIFIABLE (EXT4_SECRM_FL | \ + EXT4_UNRM_FL | \ + EXT4_COMPR_FL | \ + EXT4_SYNC_FL | \ + EXT4_IMMUTABLE_FL | \ + EXT4_APPEND_FL | \ + EXT4_NODUMP_FL | \ + EXT4_NOATIME_FL | \ + EXT4_JOURNAL_DATA_FL | \ + EXT4_NOTAIL_FL | \ + EXT4_DIRSYNC_FL | \ + EXT4_TOPDIR_FL | \ + EXT4_EXTENTS_FL | \ + 0x00400000 /* EXT4_EOFBLOCKS_FL */ | \ + EXT4_DAX_FL | \ + EXT4_PROJINHERIT_FL | \ + EXT4_CASEFOLD_FL) + +/* User visible flags */ +#define EXT4_FL_USER_VISIBLE (EXT4_FL_USER_MODIFIABLE | \ + EXT4_DIRTY_FL | \ + EXT4_COMPRBLK_FL | \ + EXT4_NOCOMPR_FL | \ + EXT4_ENCRYPT_FL | \ + EXT4_INDEX_FL | \ + EXT4_VERITY_FL | \ + EXT4_INLINE_DATA_FL) /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ #define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \ From cb29a02d3a9d51c749f556b3bbf6551fbc0454eb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 14 Jul 2020 16:09:09 -0700 Subject: [PATCH 026/290] ext4: use generic names for generic ioctls Don't define EXT4_IOC_* aliases to ioctls that already have a generic FS_IOC_* name. These aliases are unnecessary, and they make it unclear which ioctls are ext4-specific and which are generic. Exception: leave EXT4_IOC_GETVERSION_OLD and EXT4_IOC_SETVERSION_OLD as-is for now, since renaming them to FS_IOC_GETVERSION and FS_IOC_SETVERSION would probably make them more likely to be confused with EXT4_IOC_GETVERSION and EXT4_IOC_SETVERSION which also exist. Signed-off-by: Eric Biggers Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200714230909.56349-1-ebiggers@kernel.org Signed-off-by: Theodore Ts'o --- Documentation/admin-guide/ext4.rst | 20 +++++++++---------- fs/ext4/ext4.h | 12 +---------- fs/ext4/ioctl.c | 32 +++++++++++++++--------------- 3 files changed, 27 insertions(+), 37 deletions(-) diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst index 9443fcef1876..7fc6a72920c9 100644 --- a/Documentation/admin-guide/ext4.rst +++ b/Documentation/admin-guide/ext4.rst @@ -522,21 +522,21 @@ Files in /sys/fs/ext4/: Ioctls ====== -There is some Ext4 specific functionality which can be accessed by applications -through the system call interfaces. The list of all Ext4 specific ioctls are -shown in the table below. +Ext4 implements various ioctls which can be used by applications to access +ext4-specific functionality. An incomplete list of these ioctls is shown in the +table below. This list includes truly ext4-specific ioctls (``EXT4_IOC_*``) as +well as ioctls that may have been ext4-specific originally but are now supported +by some other filesystem(s) too (``FS_IOC_*``). -Table of Ext4 specific ioctls +Table of Ext4 ioctls - EXT4_IOC_GETFLAGS + FS_IOC_GETFLAGS Get additional attributes associated with inode. The ioctl argument is - an integer bitfield, with bit values described in ext4.h. This ioctl is - an alias for FS_IOC_GETFLAGS. + an integer bitfield, with bit values described in ext4.h. - EXT4_IOC_SETFLAGS + FS_IOC_SETFLAGS Set additional attributes associated with inode. The ioctl argument is - an integer bitfield, with bit values described in ext4.h. This ioctl is - an alias for FS_IOC_SETFLAGS. + an integer bitfield, with bit values described in ext4.h. EXT4_IOC_GETVERSION, EXT4_IOC_GETVERSION_OLD Get the inode i_generation number stored for each inode. The diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b603a28a3696..dd5e6c645442 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -463,7 +463,7 @@ struct flex_groups { EXT4_VERITY_FL | \ EXT4_INLINE_DATA_FL) -/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ +/* Flags we can manipulate with through FS_IOC_FSSETXATTR */ #define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \ EXT4_IMMUTABLE_FL | \ EXT4_APPEND_FL | \ @@ -695,8 +695,6 @@ enum { /* * ioctl commands */ -#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS -#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS #define EXT4_IOC_GETVERSION _IOR('f', 3, long) #define EXT4_IOC_SETVERSION _IOW('f', 4, long) #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION @@ -713,17 +711,11 @@ enum { #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) #define EXT4_IOC_SWAP_BOOT _IO('f', 17) #define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) -#define EXT4_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY -#define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT -#define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY /* ioctl codes 19--39 are reserved for fscrypt */ #define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40) #define EXT4_IOC_GETSTATE _IOW('f', 41, __u32) #define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap) -#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR -#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR - #define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32) /* @@ -748,8 +740,6 @@ enum { /* * ioctl commands in 32 bit emulation */ -#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS #define EXT4_IOC32_GETVERSION _IOR('f', 3, int) #define EXT4_IOC32_SETVERSION _IOW('f', 4, int) #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 999cf6add39c..6e70a63dcca7 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -819,12 +819,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case FS_IOC_GETFSMAP: return ext4_ioc_getfsmap(sb, (void __user *)arg); - case EXT4_IOC_GETFLAGS: + case FS_IOC_GETFLAGS: flags = ei->i_flags & EXT4_FL_USER_VISIBLE; if (S_ISREG(inode->i_mode)) flags &= ~EXT4_PROJINHERIT_FL; return put_user(flags, (int __user *) arg); - case EXT4_IOC_SETFLAGS: { + case FS_IOC_SETFLAGS: { int err; if (!inode_owner_or_capable(inode)) @@ -1129,12 +1129,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case EXT4_IOC_PRECACHE_EXTENTS: return ext4_ext_precache(inode); - case EXT4_IOC_SET_ENCRYPTION_POLICY: + case FS_IOC_SET_ENCRYPTION_POLICY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); - case EXT4_IOC_GET_ENCRYPTION_PWSALT: { + case FS_IOC_GET_ENCRYPTION_PWSALT: { #ifdef CONFIG_FS_ENCRYPTION int err, err2; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1174,7 +1174,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EOPNOTSUPP; #endif } - case EXT4_IOC_GET_ENCRYPTION_POLICY: + case FS_IOC_GET_ENCRYPTION_POLICY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); @@ -1236,7 +1236,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case EXT4_IOC_GET_ES_CACHE: return ext4_ioctl_get_es_cache(filp, arg); - case EXT4_IOC_FSGETXATTR: + case FS_IOC_FSGETXATTR: { struct fsxattr fa; @@ -1247,7 +1247,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EFAULT; return 0; } - case EXT4_IOC_FSSETXATTR: + case FS_IOC_FSSETXATTR: { struct fsxattr fa, old_fa; int err; @@ -1313,11 +1313,11 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { - case EXT4_IOC32_GETFLAGS: - cmd = EXT4_IOC_GETFLAGS; + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; break; - case EXT4_IOC32_SETFLAGS: - cmd = EXT4_IOC_SETFLAGS; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; break; case EXT4_IOC32_GETVERSION: cmd = EXT4_IOC_GETVERSION; @@ -1361,9 +1361,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EXT4_IOC_RESIZE_FS: case FITRIM: case EXT4_IOC_PRECACHE_EXTENTS: - case EXT4_IOC_SET_ENCRYPTION_POLICY: - case EXT4_IOC_GET_ENCRYPTION_PWSALT: - case EXT4_IOC_GET_ENCRYPTION_POLICY: + case FS_IOC_SET_ENCRYPTION_POLICY: + case FS_IOC_GET_ENCRYPTION_PWSALT: + case FS_IOC_GET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_POLICY_EX: case FS_IOC_ADD_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY: @@ -1377,8 +1377,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EXT4_IOC_CLEAR_ES_CACHE: case EXT4_IOC_GETSTATE: case EXT4_IOC_GET_ES_CACHE: - case EXT4_IOC_FSGETXATTR: - case EXT4_IOC_FSSETXATTR: + case FS_IOC_FSGETXATTR: + case FS_IOC_FSSETXATTR: break; default: return -ENOIOCTLCMD; From 3cb77bd241a48c54bca7bd8510516939264944c5 Mon Sep 17 00:00:00 2001 From: brookxu Date: Wed, 15 Jul 2020 11:00:44 +0800 Subject: [PATCH 027/290] ext4: fix spelling typos in ext4_mb_initialize_context Fix spelling typos in ext4_mb_initialize_context. Signed-off-by: Chunguang Xu Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/883b523c-58ec-7f38-0bb8-cd2ea4393684@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c0a331e2feb0..6dc2c6c535ef 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4399,7 +4399,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ac->ac_g_ex = ac->ac_o_ex; ac->ac_flags = ar->flags; - /* we have to define context: we'll we work with a file or + /* we have to define context: we'll work with a file or * locality group. this is a policy, actually */ ext4_mb_group_or_file(ac); From 273108fa5015eeffc4bacfa5ce272af3434b96e4 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 17 Jul 2020 11:06:05 +0200 Subject: [PATCH 028/290] ext4: handle read only external journal device Ext4 uses blkdev_get_by_dev() to get the block_device for journal device which does check to see if the read-only block device was opened read-only. As a result ext4 will hapily proceed mounting the file system with external journal on read-only device. This is bad as we would not be able to use the journal leading to errors later on. Instead of simply failing to mount file system in this case, treat it in a similar way we treat internal journal on read-only device. Allow to mount with -o noload in read-only mode. This can be reproduced easily like this: mke2fs -F -O journal_dev $JOURNAL_DEV 100M mkfs.$FSTYPE -F -J device=$JOURNAL_DEV $FS_DEV blockdev --setro $JOURNAL_DEV mount $FS_DEV $MNT touch $MNT/file umount $MNT leading to error like this [ 1307.318713] ------------[ cut here ]------------ [ 1307.323362] generic_make_request: Trying to write to read-only block-device dm-2 (partno 0) [ 1307.331741] WARNING: CPU: 36 PID: 3224 at block/blk-core.c:855 generic_make_request_checks+0x2c3/0x580 [ 1307.341041] Modules linked in: ext4 mbcache jbd2 rfkill intel_rapl_msr intel_rapl_common isst_if_commd [ 1307.419445] CPU: 36 PID: 3224 Comm: jbd2/dm-2 Tainted: G W I 5.8.0-rc5 #2 [ 1307.427359] Hardware name: Dell Inc. PowerEdge R740/01KPX8, BIOS 2.3.10 08/15/2019 [ 1307.434932] RIP: 0010:generic_make_request_checks+0x2c3/0x580 [ 1307.440676] Code: 94 03 00 00 48 89 df 48 8d 74 24 08 c6 05 cf 2b 18 01 01 e8 7f a4 ff ff 48 c7 c7 50e [ 1307.459420] RSP: 0018:ffffc0d70eb5fb48 EFLAGS: 00010286 [ 1307.464646] RAX: 0000000000000000 RBX: ffff9b33b2978300 RCX: 0000000000000000 [ 1307.471780] RDX: ffff9b33e12a81e0 RSI: ffff9b33e1298000 RDI: ffff9b33e1298000 [ 1307.478913] RBP: ffff9b7b9679e0c0 R08: 0000000000000837 R09: 0000000000000024 [ 1307.486044] R10: 0000000000000000 R11: ffffc0d70eb5f9f0 R12: 0000000000000400 [ 1307.493177] R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000000 [ 1307.500308] FS: 0000000000000000(0000) GS:ffff9b33e1280000(0000) knlGS:0000000000000000 [ 1307.508396] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1307.514142] CR2: 000055eaf4109000 CR3: 0000003dee40a006 CR4: 00000000007606e0 [ 1307.521273] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1307.528407] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1307.535538] PKRU: 55555554 [ 1307.538250] Call Trace: [ 1307.540708] generic_make_request+0x30/0x340 [ 1307.544985] submit_bio+0x43/0x190 [ 1307.548393] ? bio_add_page+0x62/0x90 [ 1307.552068] submit_bh_wbc+0x16a/0x190 [ 1307.555833] jbd2_write_superblock+0xec/0x200 [jbd2] [ 1307.560803] jbd2_journal_update_sb_log_tail+0x65/0xc0 [jbd2] [ 1307.566557] jbd2_journal_commit_transaction+0x2ae/0x1860 [jbd2] [ 1307.572566] ? check_preempt_curr+0x7a/0x90 [ 1307.576756] ? update_curr+0xe1/0x1d0 [ 1307.580421] ? account_entity_dequeue+0x7b/0xb0 [ 1307.584955] ? newidle_balance+0x231/0x3d0 [ 1307.589056] ? __switch_to_asm+0x42/0x70 [ 1307.592986] ? __switch_to_asm+0x36/0x70 [ 1307.596918] ? lock_timer_base+0x67/0x80 [ 1307.600851] kjournald2+0xbd/0x270 [jbd2] [ 1307.604873] ? finish_wait+0x80/0x80 [ 1307.608460] ? commit_timeout+0x10/0x10 [jbd2] [ 1307.612915] kthread+0x114/0x130 [ 1307.616152] ? kthread_park+0x80/0x80 [ 1307.619816] ret_from_fork+0x22/0x30 [ 1307.623400] ---[ end trace 27490236265b1630 ]--- Signed-off-by: Lukas Czerner Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20200717090605.2612-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 51 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9fdad843b30e..dda967efcbc2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5079,6 +5079,7 @@ static int ext4_load_journal(struct super_block *sb, dev_t journal_dev; int err = 0; int really_read_only; + int journal_dev_ro; if (WARN_ON_ONCE(!ext4_has_feature_journal(sb))) return -EFSCORRUPTED; @@ -5091,7 +5092,31 @@ static int ext4_load_journal(struct super_block *sb, } else journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); - really_read_only = bdev_read_only(sb->s_bdev); + if (journal_inum && journal_dev) { + ext4_msg(sb, KERN_ERR, + "filesystem has both journal inode and journal device!"); + return -EINVAL; + } + + if (journal_inum) { + journal = ext4_get_journal(sb, journal_inum); + if (!journal) + return -EINVAL; + } else { + journal = ext4_get_dev_journal(sb, journal_dev); + if (!journal) + return -EINVAL; + } + + journal_dev_ro = bdev_read_only(journal->j_dev); + really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro; + + if (journal_dev_ro && !sb_rdonly(sb)) { + ext4_msg(sb, KERN_ERR, + "journal device read-only, try mounting with '-o ro'"); + err = -EROFS; + goto err_out; + } /* * Are we loading a blank journal or performing recovery after a @@ -5106,27 +5131,14 @@ static int ext4_load_journal(struct super_block *sb, ext4_msg(sb, KERN_ERR, "write access " "unavailable, cannot proceed " "(try mounting with noload)"); - return -EROFS; + err = -EROFS; + goto err_out; } ext4_msg(sb, KERN_INFO, "write access will " "be enabled during recovery"); } } - if (journal_inum && journal_dev) { - ext4_msg(sb, KERN_ERR, "filesystem has both journal " - "and inode journals!"); - return -EINVAL; - } - - if (journal_inum) { - if (!(journal = ext4_get_journal(sb, journal_inum))) - return -EINVAL; - } else { - if (!(journal = ext4_get_dev_journal(sb, journal_dev))) - return -EINVAL; - } - if (!(journal->j_flags & JBD2_BARRIER)) ext4_msg(sb, KERN_INFO, "barriers disabled"); @@ -5146,8 +5158,7 @@ static int ext4_load_journal(struct super_block *sb, if (err) { ext4_msg(sb, KERN_ERR, "error loading journal"); - jbd2_journal_destroy(journal); - return err; + goto err_out; } EXT4_SB(sb)->s_journal = journal; @@ -5167,6 +5178,10 @@ static int ext4_load_journal(struct super_block *sb, } return 0; + +err_out: + jbd2_journal_destroy(journal); + return err; } static int ext4_commit_super(struct super_block *sb, int sync) From cfd73237722135807967f389bcbda558a60a30d6 Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Tue, 21 Apr 2020 10:54:07 +0300 Subject: [PATCH 029/290] ext4: add prefetching for block allocation bitmaps This should significantly improve bitmap loading, especially for flex groups as it tries to load all bitmaps within a flex.group instead of one by one synchronously. Prefetching is done in 8 * flex_bg groups, so it should be 8 read-ahead reads for a single allocating thread. At the end of allocation the thread waits for read-ahead completion and initializes buddy information so that read-aheads are not lost in case of memory pressure. At cr=0 the number of prefetching IOs is limited per allocation context to prevent a situation when mballoc loads thousands of bitmaps looking for a perfect group and ignoring groups with good chunks. Together with the patch "ext4: limit scanning of uninitialized groups" the mount time (which includes few tiny allocations) of a 1PB filesystem is reduced significantly: 0% full 50%-full unpatched patched mount time 33s 9279s 563s [ Restructured by tytso; removed the state flags in the allocation context, so it can be used to lazily prefetch the allocation bitmaps immediately after the file system is mounted. Skip prefetching block groups which are uninitialized. Finally pass in the REQ_RAHEAD flag to the block layer while prefetching. ] Signed-off-by: Alex Zhuravlev Reviewed-by: Andreas Dilger Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 14 +++-- fs/ext4/ext4.h | 8 ++- fs/ext4/mballoc.c | 133 +++++++++++++++++++++++++++++++++++++++++++++- fs/ext4/sysfs.c | 4 ++ 4 files changed, 153 insertions(+), 6 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1ba46d87cdf1..1e2b1b4093aa 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -413,7 +413,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb, * Return buffer_head on success or an ERR_PTR in case of failure. */ struct buffer_head * -ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) +ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group, + bool ignore_locked) { struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -441,6 +442,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) return ERR_PTR(-ENOMEM); } + if (ignore_locked && buffer_locked(bh)) { + /* buffer under IO already, return if called for prefetching */ + put_bh(bh); + return NULL; + } + if (bitmap_uptodate(bh)) goto verify; @@ -490,7 +497,8 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) trace_ext4_read_block_bitmap_load(sb, block_group); bh->b_end_io = ext4_end_bitmap_read; get_bh(bh); - submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO | + (ignore_locked ? REQ_RAHEAD : 0), bh); return bh; verify: err = ext4_validate_block_bitmap(sb, desc, block_group, bh); @@ -534,7 +542,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) struct buffer_head *bh; int err; - bh = ext4_read_block_bitmap_nowait(sb, block_group); + bh = ext4_read_block_bitmap_nowait(sb, block_group, false); if (IS_ERR(bh)) return bh; err = ext4_wait_block_bitmap(sb, block_group, bh); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index dd5e6c645442..4fba138b8722 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1521,6 +1521,8 @@ struct ext4_sb_info { /* where last allocation was done - for stream allocation */ unsigned long s_mb_last_group; unsigned long s_mb_last_start; + unsigned int s_mb_prefetch; + unsigned int s_mb_prefetch_limit; /* stats for buddy allocator */ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ @@ -2462,7 +2464,8 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, - ext4_group_t block_group); + ext4_group_t block_group, + bool ignore_locked); extern int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, struct buffer_head *bh); @@ -3161,6 +3164,7 @@ struct ext4_group_info { (1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT) #define EXT4_GROUP_INFO_IBITMAP_CORRUPT \ (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT) +#define EXT4_GROUP_INFO_BBITMAP_READ_BIT 4 #define EXT4_MB_GRP_NEED_INIT(grp) \ (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) @@ -3175,6 +3179,8 @@ struct ext4_group_info { (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) +#define EXT4_MB_GRP_TEST_AND_SET_READ(grp) \ + (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state))) #define EXT4_MAX_CONTENTION 8 #define EXT4_CONTENTION_THRESHOLD 2 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6dc2c6c535ef..c350ae9cb0d9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -922,7 +922,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) bh[i] = NULL; continue; } - bh[i] = ext4_read_block_bitmap_nowait(sb, group); + bh[i] = ext4_read_block_bitmap_nowait(sb, group, false); if (IS_ERR(bh[i])) { err = PTR_ERR(bh[i]); bh[i] = NULL; @@ -2209,12 +2209,93 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, return ret; } +/* + * Start prefetching @nr block bitmaps starting at @group. + * Return the next group which needs to be prefetched. + */ +static ext4_group_t +ext4_mb_prefetch(struct super_block *sb, ext4_group_t group, + unsigned int nr, int *cnt) +{ + ext4_group_t ngroups = ext4_get_groups_count(sb); + struct buffer_head *bh; + struct blk_plug plug; + + blk_start_plug(&plug); + while (nr-- > 0) { + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, + NULL); + struct ext4_group_info *grp = ext4_get_group_info(sb, group); + + /* + * Prefetch block groups with free blocks; but don't + * bother if it is marked uninitialized on disk, since + * it won't require I/O to read. Also only try to + * prefetch once, so we avoid getblk() call, which can + * be expensive. + */ + if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) && + EXT4_MB_GRP_NEED_INIT(grp) && + ext4_free_group_clusters(sb, gdp) > 0 && + !(ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) { + bh = ext4_read_block_bitmap_nowait(sb, group, true); + if (bh && !IS_ERR(bh)) { + if (!buffer_uptodate(bh) && cnt) + (*cnt)++; + brelse(bh); + } + } + if (++group >= ngroups) + group = 0; + } + blk_finish_plug(&plug); + return group; +} + +/* + * Prefetching reads the block bitmap into the buffer cache; but we + * need to make sure that the buddy bitmap in the page cache has been + * initialized. Note that ext4_mb_init_group() will block if the I/O + * is not yet completed, or indeed if it was not initiated by + * ext4_mb_prefetch did not start the I/O. + * + * TODO: We should actually kick off the buddy bitmap setup in a work + * queue when the buffer I/O is completed, so that we don't block + * waiting for the block allocation bitmap read to finish when + * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator(). + */ +static void +ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group, + unsigned int nr) +{ + while (nr-- > 0) { + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, + NULL); + struct ext4_group_info *grp = ext4_get_group_info(sb, group); + + if (!group) + group = ext4_get_groups_count(sb); + group--; + grp = ext4_get_group_info(sb, group); + + if (EXT4_MB_GRP_NEED_INIT(grp) && + ext4_free_group_clusters(sb, gdp) > 0 && + !(ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) { + if (ext4_mb_init_group(sb, group, GFP_NOFS)) + break; + } + } +} + static noinline_for_stack int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { - ext4_group_t ngroups, group, i; + ext4_group_t prefetch_grp = 0, ngroups, group, i; int cr = -1; int err = 0, first_err = 0; + unsigned int nr = 0, prefetch_ios = 0; struct ext4_sb_info *sbi; struct super_block *sb; struct ext4_buddy e4b; @@ -2282,6 +2363,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) * from the goal value specified */ group = ac->ac_g_ex.fe_group; + prefetch_grp = group; for (i = 0; i < ngroups; group++, i++) { int ret = 0; @@ -2293,6 +2375,29 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) if (group >= ngroups) group = 0; + /* + * Batch reads of the block allocation bitmaps + * to get multiple READs in flight; limit + * prefetching at cr=0/1, otherwise mballoc can + * spend a lot of time loading imperfect groups + */ + if ((prefetch_grp == group) && + (cr > 1 || + prefetch_ios < sbi->s_mb_prefetch_limit)) { + unsigned int curr_ios = prefetch_ios; + + nr = sbi->s_mb_prefetch; + if (ext4_has_feature_flex_bg(sb)) { + nr = (group / sbi->s_mb_prefetch) * + sbi->s_mb_prefetch; + nr = nr + sbi->s_mb_prefetch - group; + } + prefetch_grp = ext4_mb_prefetch(sb, group, + nr, &prefetch_ios); + if (prefetch_ios == curr_ios) + nr = 0; + } + /* This now checks without needing the buddy page */ ret = ext4_mb_good_group_nolock(ac, group, cr); if (ret <= 0) { @@ -2367,6 +2472,10 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n", ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status, ac->ac_flags, cr, err); + + if (nr) + ext4_mb_prefetch_fini(sb, prefetch_grp, nr); + return err; } @@ -2613,6 +2722,26 @@ static int ext4_mb_init_backend(struct super_block *sb) goto err_freebuddy; } + if (ext4_has_feature_flex_bg(sb)) { + /* a single flex group is supposed to be read by a single IO */ + sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex; + sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ + } else { + sbi->s_mb_prefetch = 32; + } + if (sbi->s_mb_prefetch > ext4_get_groups_count(sb)) + sbi->s_mb_prefetch = ext4_get_groups_count(sb); + /* now many real IOs to prefetch within a single allocation at cr=0 + * given cr=0 is an CPU-related optimization we shouldn't try to + * load too many groups, at some point we should start to use what + * we've got in memory. + * with an average random access time 5ms, it'd take a second to get + * 200 groups (* N with flex_bg), so let's make this limit 4 + */ + sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4; + if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb)) + sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb); + return 0; err_freebuddy: diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 6c9fc9e21c13..31e0db726d21 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -240,6 +240,8 @@ EXT4_RO_ATTR_ES_STRING(last_error_func, s_last_error_func, 32); EXT4_ATTR(first_error_time, 0444, first_error_time); EXT4_ATTR(last_error_time, 0444, last_error_time); EXT4_ATTR(journal_task, 0444, journal_task); +EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch); +EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit); static unsigned int old_bump_val = 128; EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); @@ -283,6 +285,8 @@ static struct attribute *ext4_attrs[] = { #ifdef CONFIG_EXT4_DEBUG ATTR_LIST(simulate_fail), #endif + ATTR_LIST(mb_prefetch), + ATTR_LIST(mb_prefetch_limit), NULL, }; ATTRIBUTE_GROUPS(ext4); From c1d2c7d47e15482bb23cda83a5021e60f624a09c Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Fri, 19 Jun 2020 22:08:56 -0400 Subject: [PATCH 030/290] ext4: skip non-loaded groups at cr=0/1 when scanning for good groups cr=0 is supposed to be an optimization to save CPU cycles, but if buddy data (in memory) is not initialized then all this makes no sense as we have to do sync IO taking a lot of cycles. Also, at cr=0 mballoc doesn't choose any available chunk. cr=1 also skips groups using heuristic based on avg. fragment size. It's more useful to skip such groups and switch to cr=2 where groups will be scanned for available chunks. However, we always read the first block group in a flex_bg so metadata blocks will get read into the first flex_bg if possible. Using sparse image and dm-slow virtual device of 120TB was simulated, then the image was formatted and filled using debugfs to mark ~85% of available space as busy. mount process w/o the patch couldn't complete in half an hour (according to vmstat it would take ~10-11 hours). With the patch applied mount took ~20 seconds. Lustre-bug-id: https://jira.whamcloud.com/browse/LU-12988 Signed-off-by: Alex Zhuravlev Reviewed-by: Andreas Dilger Reviewed-by: Artem Blagodarenko --- fs/ext4/mballoc.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c350ae9cb0d9..9a07da53ab7b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2177,6 +2177,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, { struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); struct super_block *sb = ac->ac_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK; ext4_grpblk_t free; int ret = 0; @@ -2195,7 +2196,25 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { - ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS); + struct ext4_group_desc *gdp = + ext4_get_group_desc(sb, group, NULL); + int ret; + + /* cr=0/1 is a very optimistic search to find large + * good chunks almost for free. If buddy data is not + * ready, then this optimization makes no sense. But + * we never skip the first block group in a flex_bg, + * since this gets used for metadata block allocation, + * and we want to make sure we locate metadata blocks + * in the first block group in the flex_bg if possible. + */ + if (cr < 2 && + (!sbi->s_log_groups_per_flex || + ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) && + !(ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) + return 0; + ret = ext4_mb_init_group(sb, group, GFP_NOFS); if (ret) return ret; } From 8b6ec999b198b59ae61e86e70f5e9df73fe4754f Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Tue, 21 Jul 2020 18:16:47 +0800 Subject: [PATCH 031/290] drm/virtio: fix missing dma_fence_put() in virtio_gpu_execbuffer_ioctl() We should put the reference count of the fence after calling virtio_gpu_cmd_submit(). So add the missing dma_fence_put(). Fixes: 2cd7b6f08bc4 ("drm/virtio: add in/out fence support for explicit synchronization") Co-developed-by: Xin He Signed-off-by: Xin He Signed-off-by: Qi Liu Reviewed-by: Muchun Song Link: http://patchwork.freedesktop.org/patch/msgid/20200721101647.42653-1-hexin.op@bytedance.com Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 5df722072ba0..19c5bc01eb79 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -179,6 +179,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, virtio_gpu_cmd_submit(vgdev, buf, exbuf->size, vfpriv->ctx_id, buflist, out_fence); + dma_fence_put(&out_fence->f); virtio_gpu_notify(vgdev); return 0; From 836b194d65782aaec4485a07d2aab52d3f698505 Mon Sep 17 00:00:00 2001 From: Xin He Date: Wed, 22 Jul 2020 13:18:51 +0800 Subject: [PATCH 032/290] drm/virtio: fix memory leak in virtio_gpu_cleanup_object() Before setting shmem->pages to NULL, kfree() should be called. Signed-off-by: Xin He Reviewed-by: Qi Liu Link: http://patchwork.freedesktop.org/patch/msgid/20200722051851.72662-1-hexin.op@bytedance.com Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/virtio/virtgpu_object.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 6ccbd01cd888..703b5cd51751 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -79,6 +79,7 @@ void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo) } sg_free_table(shmem->pages); + kfree(shmem->pages); shmem->pages = NULL; drm_gem_shmem_unpin(&bo->base.base); } From bc71726c725767205757821df364acff87f92ac5 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Sat, 20 Jun 2020 10:54:23 +0800 Subject: [PATCH 033/290] ext4: abort the filesystem if failed to async write metadata buffer There is a risk of filesystem inconsistency if we failed to async write back metadata buffer in the background. Because of current buffer's end io procedure is handled by end_buffer_async_write() in the block layer, and it only clear the buffer's uptodate flag and mark the write_io_error flag, so ext4 cannot detect such failure immediately. In most cases of getting metadata buffer (e.g. ext4_read_inode_bitmap()), although the buffer's data is actually uptodate, it may still read data from disk because the buffer's uptodate flag has been cleared. Finally, it may lead to on-disk filesystem inconsistency if reading old data from the disk successfully and write them out again. This patch detect bdev mapping->wb_err when getting journal's write access and mark the filesystem error if bdev's mapping->wb_err was increased, this could prevent further writing and potential inconsistency. Signed-off-by: zhangyi (F) Suggested-by: Jan Kara Link: https://lore.kernel.org/r/20200620025427.1756360-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 +++ fs/ext4/ext4_jbd2.c | 25 +++++++++++++++++++++++++ fs/ext4/super.c | 17 +++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4fba138b8722..26ae31a994a2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1603,6 +1603,9 @@ struct ext4_sb_info { #ifdef CONFIG_EXT4_DEBUG unsigned long s_simulate_fail; #endif + /* Record the errseq of the backing block device */ + errseq_t s_bdev_wb_err; + spinlock_t s_bdev_wb_lock; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 0c76cdd44d90..760b9ee49dc0 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -195,6 +195,28 @@ static void ext4_journal_abort_handle(const char *caller, unsigned int line, jbd2_journal_abort_handle(handle); } +static void ext4_check_bdev_write_error(struct super_block *sb) +{ + struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; + struct ext4_sb_info *sbi = EXT4_SB(sb); + int err; + + /* + * If the block device has write error flag, it may have failed to + * async write out metadata buffers in the background. In this case, + * we could read old data from disk and write it out again, which + * may lead to on-disk filesystem inconsistency. + */ + if (errseq_check(&mapping->wb_err, READ_ONCE(sbi->s_bdev_wb_err))) { + spin_lock(&sbi->s_bdev_wb_lock); + err = errseq_check_and_advance(&mapping->wb_err, &sbi->s_bdev_wb_err); + spin_unlock(&sbi->s_bdev_wb_lock); + if (err) + ext4_error_err(sb, -err, + "Error while async write back metadata"); + } +} + int __ext4_journal_get_write_access(const char *where, unsigned int line, handle_t *handle, struct buffer_head *bh) { @@ -202,6 +224,9 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line, might_sleep(); + if (bh->b_bdev->bd_super) + ext4_check_bdev_write_error(bh->b_bdev->bd_super); + if (ext4_handle_valid(handle)) { err = jbd2_journal_get_write_access(handle, bh); if (err) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dda967efcbc2..c77b10257b36 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4765,6 +4765,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } #endif /* CONFIG_QUOTA */ + /* + * Save the original bdev mapping's wb_err value which could be + * used to detect the metadata async write error. + */ + spin_lock_init(&sbi->s_bdev_wb_lock); + if (!sb_rdonly(sb)) + errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err, + &sbi->s_bdev_wb_err); + sb->s_bdev->bd_super = sb; EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; @@ -5654,6 +5663,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + /* + * Update the original bdev mapping's wb_err value + * which could be used to detect the metadata async + * write error. + */ + errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err, + &sbi->s_bdev_wb_err); + /* * Mounting a RDONLY partition read-write, so reread * and store the current valid flag. (It may have From c044f3d8360d2ecf831ba2cc9f08cf9fb2c699fb Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Sat, 20 Jun 2020 10:54:26 +0800 Subject: [PATCH 034/290] jbd2: abort journal if free a async write error metadata buffer If we free a metadata buffer which has been failed to async write out in the background, the jbd2 checkpoint procedure will not detect this failure in jbd2_log_do_checkpoint(), so it may lead to filesystem inconsistency after cleanup journal tail. This patch abort the journal if free a buffer has write_io_error flag to prevent potential further inconsistency. Signed-off-by: zhangyi (F) Link: https://lore.kernel.org/r/20200620025427.1756360-5-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/jbd2/transaction.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e65e0aca2826..6250c9faa4cb 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2120,6 +2120,7 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, { struct buffer_head *head; struct buffer_head *bh; + bool has_write_io_error = false; int ret = 0; J_ASSERT(PageLocked(page)); @@ -2144,11 +2145,26 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, jbd2_journal_put_journal_head(jh); if (buffer_jbd(bh)) goto busy; + + /* + * If we free a metadata buffer which has been failed to + * write out, the jbd2 checkpoint procedure will not detect + * this failure and may lead to filesystem inconsistency + * after cleanup journal tail. + */ + if (buffer_write_io_error(bh)) { + pr_err("JBD2: Error while async write back metadata bh %llu.", + (unsigned long long)bh->b_blocknr); + has_write_io_error = true; + } } while ((bh = bh->b_this_page) != head); ret = try_to_free_buffers(page); busy: + if (has_write_io_error) + jbd2_journal_abort(journal, -EIO); + return ret; } From 529a781ee07aaa58be8164d75ba5998eb7dd216c Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Sat, 20 Jun 2020 10:54:27 +0800 Subject: [PATCH 035/290] jbd2: remove unused parameter in jbd2_journal_try_to_free_buffers() Parameter gfp_mask in jbd2_journal_try_to_free_buffers() is no longer used after commit <536fc240e7147> ("jbd2: clean up jbd2_journal_try_to_free_buffers()"), so just remove it. Signed-off-by: zhangyi (F) Link: https://lore.kernel.org/r/20200620025427.1756360-6-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 2 +- fs/ext4/super.c | 4 ++-- fs/jbd2/transaction.c | 7 +------ include/linux/jbd2.h | 2 +- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6187c8880c02..551a1056870d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3288,7 +3288,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait) if (PageChecked(page)) return 0; if (journal) - return jbd2_journal_try_to_free_buffers(journal, page, wait); + return jbd2_journal_try_to_free_buffers(journal, page); else return try_to_free_buffers(page); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c77b10257b36..8c00f0a09f4d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1288,8 +1288,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, if (!page_has_buffers(page)) return 0; if (journal) - return jbd2_journal_try_to_free_buffers(journal, page, - wait & ~__GFP_DIRECT_RECLAIM); + return jbd2_journal_try_to_free_buffers(journal, page); + return try_to_free_buffers(page); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 6250c9faa4cb..43985738aa86 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2081,10 +2081,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) * int jbd2_journal_try_to_free_buffers() - try to free page buffers. * @journal: journal for operation * @page: to try and free - * @gfp_mask: we use the mask to detect how hard should we try to release - * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit - * code to release the buffers. - * * * For all the buffers on this page, * if they are fully written out ordered data, move them onto BUF_CLEAN @@ -2115,8 +2111,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) * * Return 0 on failure, 1 on success */ -int jbd2_journal_try_to_free_buffers(journal_t *journal, - struct page *page, gfp_t gfp_mask) +int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page) { struct buffer_head *head; struct buffer_head *bh; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d56128df2aff..a756a4cdf939 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1380,7 +1380,7 @@ extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); extern int jbd2_journal_invalidatepage(journal_t *, struct page *, unsigned int, unsigned int); -extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); +extern int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page); extern int jbd2_journal_stop(handle_t *); extern int jbd2_journal_flush (journal_t *); extern void jbd2_journal_lock_updates (journal_t *); From ab74c7b23f3770935016e3eb3ecdf1e42b73efaa Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 15 Jul 2020 11:48:55 -0400 Subject: [PATCH 036/290] ext4: indicate via a block bitmap read is prefetched via a tracepoint Modify the ext4_read_block_bitmap_load tracepoint so that it tells us whether a block bitmap is being prefetched. Signed-off-by: Theodore Ts'o Reviewed-by: Artem Blagodarenko --- fs/ext4/balloc.c | 2 +- include/trace/events/ext4.h | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1e2b1b4093aa..48c3df47748d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -494,7 +494,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group, * submit the buffer_head for reading */ set_buffer_new(bh); - trace_ext4_read_block_bitmap_load(sb, block_group); + trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked); bh->b_end_io = ext4_end_bitmap_read; get_bh(bh); submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO | diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index cc41d692ae8e..cbcd2e1a608d 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -1312,18 +1312,34 @@ DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load, TP_ARGS(sb, group) ); -DEFINE_EVENT(ext4__bitmap_load, ext4_read_block_bitmap_load, +DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap, TP_PROTO(struct super_block *sb, unsigned long group), TP_ARGS(sb, group) ); -DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap, +TRACE_EVENT(ext4_read_block_bitmap_load, + TP_PROTO(struct super_block *sb, unsigned long group, bool prefetch), - TP_PROTO(struct super_block *sb, unsigned long group), + TP_ARGS(sb, group, prefetch), - TP_ARGS(sb, group) + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( __u32, group ) + __field( bool, prefetch ) + + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->group = group; + __entry->prefetch = prefetch; + ), + + TP_printk("dev %d,%d group %u prefetch %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->group, __entry->prefetch) ); TRACE_EVENT(ext4_direct_IO_enter, From 3d392b2676bf3199863a1e5efb2c087ad9d442a4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Jul 2020 00:14:40 -0400 Subject: [PATCH 037/290] ext4: add prefetch_block_bitmaps mount option For file systems where we can afford to keep the buddy bitmaps cached, we can speed up initial writes to large file systems by starting to load the block allocation bitmaps as soon as the file system is mounted. This won't work well for _super_ large file systems, or memory constrained systems, so we only enable this when it is requested via a mount option. Addresses-Google-Bug: 159488342 Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger --- fs/ext4/ext4.h | 15 +++++++++- fs/ext4/mballoc.c | 10 +++---- fs/ext4/super.c | 57 ++++++++++++++++++++++++++----------- include/trace/events/ext4.h | 44 ++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 23 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 26ae31a994a2..e5f0c66a6156 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1188,6 +1188,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */ +#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ @@ -2334,9 +2335,15 @@ struct ext4_lazy_init { struct mutex li_list_mtx; }; +enum ext4_li_mode { + EXT4_LI_MODE_PREFETCH_BBITMAP, + EXT4_LI_MODE_ITABLE, +}; + struct ext4_li_request { struct super_block *lr_super; - struct ext4_sb_info *lr_sbi; + enum ext4_li_mode lr_mode; + ext4_group_t lr_first_not_zeroed; ext4_group_t lr_next_group; struct list_head lr_request; unsigned long lr_next_sched; @@ -2676,6 +2683,12 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int); extern void ext4_discard_preallocations(struct inode *); extern int __init ext4_init_mballoc(void); extern void ext4_exit_mballoc(void); +extern ext4_group_t ext4_mb_prefetch(struct super_block *sb, + ext4_group_t group, + unsigned int nr, int *cnt); +extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group, + unsigned int nr); + extern void ext4_free_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t block, unsigned long count, int flags); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9a07da53ab7b..8ecd49dd3906 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2232,9 +2232,8 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, * Start prefetching @nr block bitmaps starting at @group. * Return the next group which needs to be prefetched. */ -static ext4_group_t -ext4_mb_prefetch(struct super_block *sb, ext4_group_t group, - unsigned int nr, int *cnt) +ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group, + unsigned int nr, int *cnt) { ext4_group_t ngroups = ext4_get_groups_count(sb); struct buffer_head *bh; @@ -2284,9 +2283,8 @@ ext4_mb_prefetch(struct super_block *sb, ext4_group_t group, * waiting for the block allocation bitmap read to finish when * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator(). */ -static void -ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group, - unsigned int nr) +void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group, + unsigned int nr) { while (nr-- > 0) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8c00f0a09f4d..1cbe3f248faf 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1521,6 +1521,7 @@ enum { Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, + Opt_prefetch_block_bitmaps, }; static const match_table_t tokens = { @@ -1612,6 +1613,7 @@ static const match_table_t tokens = { {Opt_test_dummy_encryption, "test_dummy_encryption"}, {Opt_nombcache, "nombcache"}, {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */ + {Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"}, {Opt_removed, "check=none"}, /* mount option from ext2/3 */ {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ {Opt_removed, "reservation"}, /* mount option from ext2/3 */ @@ -1829,6 +1831,8 @@ static const struct mount_opts { {Opt_max_dir_size_kb, 0, MOPT_GTE0}, {Opt_test_dummy_encryption, 0, MOPT_STRING}, {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, + {Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS, + MOPT_SET}, {Opt_err, 0, 0} }; @@ -3201,15 +3205,34 @@ static void print_daily_error_info(struct timer_list *t) static int ext4_run_li_request(struct ext4_li_request *elr) { struct ext4_group_desc *gdp = NULL; - ext4_group_t group, ngroups; - struct super_block *sb; + struct super_block *sb = elr->lr_super; + ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t group = elr->lr_next_group; unsigned long timeout = 0; + unsigned int prefetch_ios = 0; int ret = 0; - sb = elr->lr_super; - ngroups = EXT4_SB(sb)->s_groups_count; + if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) { + elr->lr_next_group = ext4_mb_prefetch(sb, group, + EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios); + if (prefetch_ios) + ext4_mb_prefetch_fini(sb, elr->lr_next_group, + prefetch_ios); + trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group, + prefetch_ios); + if (group >= elr->lr_next_group) { + ret = 1; + if (elr->lr_first_not_zeroed != ngroups && + !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) { + elr->lr_next_group = elr->lr_first_not_zeroed; + elr->lr_mode = EXT4_LI_MODE_ITABLE; + ret = 0; + } + } + return ret; + } - for (group = elr->lr_next_group; group < ngroups; group++) { + for (; group < ngroups; group++) { gdp = ext4_get_group_desc(sb, group, NULL); if (!gdp) { ret = 1; @@ -3227,9 +3250,10 @@ static int ext4_run_li_request(struct ext4_li_request *elr) timeout = jiffies; ret = ext4_init_inode_table(sb, group, elr->lr_timeout ? 0 : 1); + trace_ext4_lazy_itable_init(sb, group); if (elr->lr_timeout == 0) { timeout = (jiffies - timeout) * - elr->lr_sbi->s_li_wait_mult; + EXT4_SB(elr->lr_super)->s_li_wait_mult; elr->lr_timeout = timeout; } elr->lr_next_sched = jiffies + elr->lr_timeout; @@ -3244,15 +3268,11 @@ static int ext4_run_li_request(struct ext4_li_request *elr) */ static void ext4_remove_li_request(struct ext4_li_request *elr) { - struct ext4_sb_info *sbi; - if (!elr) return; - sbi = elr->lr_sbi; - list_del(&elr->lr_request); - sbi->s_li_request = NULL; + EXT4_SB(elr->lr_super)->s_li_request = NULL; kfree(elr); } @@ -3461,7 +3481,6 @@ static int ext4_li_info_new(void) static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, ext4_group_t start) { - struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_li_request *elr; elr = kzalloc(sizeof(*elr), GFP_KERNEL); @@ -3469,8 +3488,13 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, return NULL; elr->lr_super = sb; - elr->lr_sbi = sbi; - elr->lr_next_group = start; + elr->lr_first_not_zeroed = start; + if (test_opt(sb, PREFETCH_BLOCK_BITMAPS)) + elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP; + else { + elr->lr_mode = EXT4_LI_MODE_ITABLE; + elr->lr_next_group = start; + } /* * Randomize first schedule time of the request to @@ -3500,8 +3524,9 @@ int ext4_register_li_request(struct super_block *sb, goto out; } - if (first_not_zeroed == ngroups || sb_rdonly(sb) || - !test_opt(sb, INIT_INODE_TABLE)) + if (!test_opt(sb, PREFETCH_BLOCK_BITMAPS) && + (first_not_zeroed == ngroups || sb_rdonly(sb) || + !test_opt(sb, INIT_INODE_TABLE))) goto out; elr = ext4_li_request_new(sb, first_not_zeroed); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index cbcd2e1a608d..8008d2e116b9 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2742,6 +2742,50 @@ TRACE_EVENT(ext4_error, __entry->function, __entry->line) ); +TRACE_EVENT(ext4_prefetch_bitmaps, + TP_PROTO(struct super_block *sb, ext4_group_t group, + ext4_group_t next, unsigned int prefetch_ios), + + TP_ARGS(sb, group, next, prefetch_ios), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( __u32, group ) + __field( __u32, next ) + __field( __u32, ios ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->group = group; + __entry->next = next; + __entry->ios = prefetch_ios; + ), + + TP_printk("dev %d,%d group %u next %u ios %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->group, __entry->next, __entry->ios) +); + +TRACE_EVENT(ext4_lazy_itable_init, + TP_PROTO(struct super_block *sb, ext4_group_t group), + + TP_ARGS(sb, group), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( __u32, group ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->group = group; + ), + + TP_printk("dev %d,%d group %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ From 60ed633f51d0c675150a117d96a45e78c3613f91 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Sat, 18 Jul 2020 08:57:37 -0400 Subject: [PATCH 038/290] jbd2: fix incorrect code style Remove unnecessary blank. Signed-off-by: Xianting Tian Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/1595077057-8048-1-git-send-email-xianting_tian@126.com Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5493a0da23dd..17fdc482f554 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1285,7 +1285,7 @@ journal_t *jbd2_journal_init_inode(struct inode *inode) * superblock as being NULL to prevent the journal destroy from writing * back a bogus superblock. */ -static void journal_fail_superblock (journal_t *journal) +static void journal_fail_superblock(journal_t *journal) { struct buffer_head *bh = journal->j_sb_buffer; brelse(bh); @@ -1817,7 +1817,7 @@ int jbd2_journal_destroy(journal_t *journal) /** - *int jbd2_journal_check_used_features () - Check if features specified are used. + *int jbd2_journal_check_used_features() - Check if features specified are used. * @journal: Journal to check. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -1827,7 +1827,7 @@ int jbd2_journal_destroy(journal_t *journal) * features. Return true (non-zero) if it does. **/ -int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, +int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { journal_superblock_t *sb; @@ -1862,7 +1862,7 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, * all of a given set of features on this journal. Return true * (non-zero) if it can. */ -int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, +int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { if (!compat && !ro && !incompat) @@ -1884,7 +1884,7 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com } /** - * int jbd2_journal_set_features () - Mark a given journal feature in the superblock + * int jbd2_journal_set_features() - Mark a given journal feature in the superblock * @journal: Journal to act on. * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount @@ -1895,7 +1895,7 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com * */ -int jbd2_journal_set_features (journal_t *journal, unsigned long compat, +int jbd2_journal_set_features(journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { #define INCOMPAT_FEATURE_ON(f) \ From f25391ebb475d3ffb3aa61bb90e3594c841749ef Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Thu, 23 Jul 2020 17:05:26 +0200 Subject: [PATCH 039/290] ext4: handle option set by mount flags correctly Currently there is a problem with mount options that can be both set by vfs using mount flags or by a string parsing in ext4. i_version/iversion options gets lost after remount, for example $ mount -o i_version /dev/pmem0 /mnt $ grep pmem0 /proc/self/mountinfo | grep i_version 310 95 259:0 / /mnt rw,relatime shared:163 - ext4 /dev/pmem0 rw,seclabel,i_version $ mount -o remount,ro /mnt $ grep pmem0 /proc/self/mountinfo | grep i_version nolazytime gets ignored by ext4 on remount, for example $ mount -o lazytime /dev/pmem0 /mnt $ grep pmem0 /proc/self/mountinfo | grep lazytime 310 95 259:0 / /mnt rw,relatime shared:163 - ext4 /dev/pmem0 rw,lazytime,seclabel $ mount -o remount,nolazytime /mnt $ grep pmem0 /proc/self/mountinfo | grep lazytime 310 95 259:0 / /mnt rw,relatime shared:163 - ext4 /dev/pmem0 rw,lazytime,seclabel Fix it by applying the SB_LAZYTIME and SB_I_VERSION flags from *flags to s_flags before we parse the option and use the resulting state of the same flags in *flags at the end of successful remount. Signed-off-by: Lukas Czerner Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/20200723150526.19931-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1cbe3f248faf..a6de88fc7060 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5512,7 +5512,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) { struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned long old_sb_flags; + unsigned long old_sb_flags, vfs_flags; struct ext4_mount_options old_opts; int enable_quota = 0; ext4_group_t g; @@ -5555,6 +5555,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal && sbi->s_journal->j_task->io_context) journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; + /* + * Some options can be enabled by ext4 and/or by VFS mount flag + * either way we need to make sure it matches in both *flags and + * s_flags. Copy those selected flags from *flags to s_flags + */ + vfs_flags = SB_LAZYTIME | SB_I_VERSION; + sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags); + if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { err = -EINVAL; goto restore_opts; @@ -5608,9 +5616,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } - if (*flags & SB_LAZYTIME) - sb->s_flags |= SB_LAZYTIME; - if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; @@ -5758,7 +5763,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } #endif - *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); + /* + * Some options can be enabled by ext4 and/or by VFS mount flag + * either way we need to make sure it matches in both *flags and + * s_flags. Copy those selected flags from s_flags to *flags + */ + *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags); + ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); kfree(orig_data); return 0; From 6dbd300129089ad57de20be89ab7c122b36a4c26 Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Thu, 23 Jul 2020 23:29:54 -0400 Subject: [PATCH 040/290] ext4: remove some redundant function declarations ext4 update feature functions do not exist now, remove these useless function declarations. Signed-off-by: Shijie Luo Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/20200724032954.22097-1-luoshijie1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e5f0c66a6156..32260ceb9cb5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2959,12 +2959,6 @@ do { \ #endif -extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, - __u32 compat); -extern int ext4_update_rocompat_feature(handle_t *handle, - struct super_block *sb, __u32 rocompat); -extern int ext4_update_incompat_feature(handle_t *handle, - struct super_block *sb, __u32 incompat); extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg); extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, From 1cf006ed19a887c22e085189c8b4a3cbf60d2246 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sat, 25 Jul 2020 12:33:13 +0000 Subject: [PATCH 041/290] ext4: export msg_count and warning_count via sysfs This numbers can be analized by system automation similar to errors_count. In ideal world it would be nice to have separate counters for different log-levels, but this makes this patch too intrusive. Signed-off-by: Dmitry Monakhov Link: https://lore.kernel.org/r/20200725123313.4467-1-dmtrmonakhov@yandex-team.ru Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 ++ fs/ext4/super.c | 12 +++++++++--- fs/ext4/sysfs.c | 7 +++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 32260ceb9cb5..c73b9a1959a9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1591,6 +1591,8 @@ struct ext4_sb_info { struct ratelimit_state s_err_ratelimit_state; struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; + atomic_t s_warning_count; + atomic_t s_msg_count; /* Encryption context for '-o test_dummy_encryption' */ struct fscrypt_dummy_context s_dummy_enc_ctx; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a6de88fc7060..7deb2b75fcd0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -744,6 +744,7 @@ void __ext4_msg(struct super_block *sb, struct va_format vaf; va_list args; + atomic_inc(&EXT4_SB(sb)->s_msg_count); if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) return; @@ -754,9 +755,12 @@ void __ext4_msg(struct super_block *sb, va_end(args); } -#define ext4_warning_ratelimit(sb) \ - ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \ - "EXT4-fs warning") +static int ext4_warning_ratelimit(struct super_block *sb) +{ + atomic_inc(&EXT4_SB(sb)->s_warning_count); + return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), + "EXT4-fs warning"); +} void __ext4_warning(struct super_block *sb, const char *function, unsigned int line, const char *fmt, ...) @@ -4840,6 +4844,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); + atomic_set(&sbi->s_warning_count, 0); + atomic_set(&sbi->s_msg_count, 0); kfree(orig_data); return 0; diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 31e0db726d21..7fee11cc30e7 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -189,6 +189,9 @@ static struct ext4_attr ext4_attr_##_name = { \ #define EXT4_RW_ATTR_SBI_UL(_name,_elname) \ EXT4_ATTR_OFFSET(_name, 0644, pointer_ul, ext4_sb_info, _elname) +#define EXT4_RO_ATTR_SBI_ATOMIC(_name,_elname) \ + EXT4_ATTR_OFFSET(_name, 0444, pointer_atomic, ext4_sb_info, _elname) + #define EXT4_ATTR_PTR(_name,_mode,_id,_ptr) \ static struct ext4_attr ext4_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ @@ -226,6 +229,8 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); #ifdef CONFIG_EXT4_DEBUG EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail); #endif +EXT4_RO_ATTR_SBI_ATOMIC(warning_count, s_warning_count); +EXT4_RO_ATTR_SBI_ATOMIC(msg_count, s_msg_count); EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); EXT4_RO_ATTR_ES_U8(first_error_errcode, s_first_error_errcode); EXT4_RO_ATTR_ES_U8(last_error_errcode, s_last_error_errcode); @@ -269,6 +274,8 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(msg_ratelimit_interval_ms), ATTR_LIST(msg_ratelimit_burst), ATTR_LIST(errors_count), + ATTR_LIST(warning_count), + ATTR_LIST(msg_count), ATTR_LIST(first_error_ino), ATTR_LIST(last_error_ino), ATTR_LIST(first_error_block), From 9375ac770cda6bdff829f43868d0ebcaba41f675 Mon Sep 17 00:00:00 2001 From: brookxu Date: Mon, 27 Jul 2020 09:54:14 +0800 Subject: [PATCH 042/290] ext4: delete the invalid BUGON in ext4_mb_load_buddy_gfp() Delete the invalid BUGON in ext4_mb_load_buddy_gfp(), the previous code has already judged whether page is NULL. Signed-off-by: Chunguang Xu Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/ad68e8a2-5ec3-5beb-537f-f3e53f55367a@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8ecd49dd3906..19101941dadd 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1279,9 +1279,6 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, e4b->bd_buddy_page = page; e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); - BUG_ON(e4b->bd_bitmap_page == NULL); - BUG_ON(e4b->bd_buddy_page == NULL); - return 0; err: From d176b1f62f242ab259ff665a26fbac69db1aecba Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 28 Jul 2020 15:04:32 +0200 Subject: [PATCH 043/290] ext4: handle error of ext4_setup_system_zone() on remount ext4_setup_system_zone() can fail. Handle the failure in ext4_remount(). Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200728130437.7804-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7deb2b75fcd0..6507e30e58b5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5747,7 +5747,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_register_li_request(sb, first_not_zeroed); } - ext4_setup_system_zone(sb); + err = ext4_setup_system_zone(sb); + if (err) + goto restore_opts; + if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) { err = ext4_commit_super(sb, 1); if (err) From bf9a379d0980e7413d94cb18dac73db2bfc5f470 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 28 Jul 2020 15:04:33 +0200 Subject: [PATCH 044/290] ext4: don't allow overlapping system zones Currently, add_system_zone() just silently merges two added system zones that overlap. However the overlap should not happen and it generally suggests that some unrelated metadata overlap which indicates the fs is corrupted. We should have caught such problems earlier (e.g. in ext4_check_descriptors()) but add this check as another line of defense. In later patch we also use this for stricter checking of journal inode extent tree. Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200728130437.7804-3-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/block_validity.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 16e9b2fda03a..b394a50ebbe3 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -68,7 +68,7 @@ static int add_system_zone(struct ext4_system_blocks *system_blks, ext4_fsblk_t start_blk, unsigned int count) { - struct ext4_system_zone *new_entry = NULL, *entry; + struct ext4_system_zone *new_entry, *entry; struct rb_node **n = &system_blks->root.rb_node, *node; struct rb_node *parent = NULL, *new_node = NULL; @@ -79,30 +79,20 @@ static int add_system_zone(struct ext4_system_blocks *system_blks, n = &(*n)->rb_left; else if (start_blk >= (entry->start_blk + entry->count)) n = &(*n)->rb_right; - else { - if (start_blk + count > (entry->start_blk + - entry->count)) - entry->count = (start_blk + count - - entry->start_blk); - new_node = *n; - new_entry = rb_entry(new_node, struct ext4_system_zone, - node); - break; - } + else /* Unexpected overlap of system zones. */ + return -EFSCORRUPTED; } - if (!new_entry) { - new_entry = kmem_cache_alloc(ext4_system_zone_cachep, - GFP_KERNEL); - if (!new_entry) - return -ENOMEM; - new_entry->start_blk = start_blk; - new_entry->count = count; - new_node = &new_entry->node; + new_entry = kmem_cache_alloc(ext4_system_zone_cachep, + GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + new_entry->start_blk = start_blk; + new_entry->count = count; + new_node = &new_entry->node; - rb_link_node(new_node, parent, n); - rb_insert_color(new_node, &system_blks->root); - } + rb_link_node(new_node, parent, n); + rb_insert_color(new_node, &system_blks->root); /* Can we merge to the left? */ node = rb_prev(new_node); From ce9f24cccdc019229b70a5c15e2b09ad9c0ab5d1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 28 Jul 2020 15:04:34 +0200 Subject: [PATCH 045/290] ext4: check journal inode extents more carefully Currently, system zones just track ranges of block, that are "important" fs metadata (bitmaps, group descriptors, journal blocks, etc.). This however complicates how extent tree (or indirect blocks) can be checked for inodes that actually track such metadata - currently the journal inode but arguably we should be treating quota files or resize inode similarly. We cannot run __ext4_ext_check() on such metadata inodes when loading their extents as that would immediately trigger the validity checks and so we just hack around that and special-case the journal inode. This however leads to a situation that a journal inode which has extent tree of depth at least one can have invalid extent tree that gets unnoticed until ext4_cache_extents() crashes. To overcome this limitation, track inode number each system zone belongs to (0 is used for zones not belonging to any inode). We can then verify inode number matches the expected one when verifying extent tree and thus avoid the false errors. With this there's no need to to special-case journal inode during extent tree checking anymore so remove it. Fixes: 0a944e8a6c66 ("ext4: don't perform block validity checks on the journal inode") Reported-by: Wolfgang Frisch Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200728130437.7804-4-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/block_validity.c | 51 ++++++++++++++++++++-------------------- fs/ext4/ext4.h | 6 ++--- fs/ext4/extents.c | 16 +++++-------- fs/ext4/indirect.c | 6 ++--- fs/ext4/inode.c | 5 ++-- fs/ext4/mballoc.c | 4 ++-- 6 files changed, 41 insertions(+), 47 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index b394a50ebbe3..e830a9d4e10d 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -24,6 +24,7 @@ struct ext4_system_zone { struct rb_node node; ext4_fsblk_t start_blk; unsigned int count; + u32 ino; }; static struct kmem_cache *ext4_system_zone_cachep; @@ -45,7 +46,8 @@ void ext4_exit_system_zone(void) static inline int can_merge(struct ext4_system_zone *entry1, struct ext4_system_zone *entry2) { - if ((entry1->start_blk + entry1->count) == entry2->start_blk) + if ((entry1->start_blk + entry1->count) == entry2->start_blk && + entry1->ino == entry2->ino) return 1; return 0; } @@ -66,7 +68,7 @@ static void release_system_zone(struct ext4_system_blocks *system_blks) */ static int add_system_zone(struct ext4_system_blocks *system_blks, ext4_fsblk_t start_blk, - unsigned int count) + unsigned int count, u32 ino) { struct ext4_system_zone *new_entry, *entry; struct rb_node **n = &system_blks->root.rb_node, *node; @@ -89,6 +91,7 @@ static int add_system_zone(struct ext4_system_blocks *system_blks, return -ENOMEM; new_entry->start_blk = start_blk; new_entry->count = count; + new_entry->ino = ino; new_node = &new_entry->node; rb_link_node(new_node, parent, n); @@ -149,7 +152,7 @@ static void debug_print_tree(struct ext4_sb_info *sbi) static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, struct ext4_system_blocks *system_blks, ext4_fsblk_t start_blk, - unsigned int count) + unsigned int count, ino_t ino) { struct ext4_system_zone *entry; struct rb_node *n; @@ -170,7 +173,7 @@ static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, else if (start_blk >= (entry->start_blk + entry->count)) n = n->rb_right; else - return 0; + return entry->ino == ino; } return 1; } @@ -204,19 +207,18 @@ static int ext4_protect_reserved_inode(struct super_block *sb, if (n == 0) { i++; } else { - if (!ext4_data_block_valid_rcu(sbi, system_blks, - map.m_pblk, n)) { - err = -EFSCORRUPTED; - __ext4_error(sb, __func__, __LINE__, -err, - map.m_pblk, "blocks %llu-%llu " - "from inode %u overlap system zone", - map.m_pblk, - map.m_pblk + map.m_len - 1, ino); + err = add_system_zone(system_blks, map.m_pblk, n, ino); + if (err < 0) { + if (err == -EFSCORRUPTED) { + __ext4_error(sb, __func__, __LINE__, + -err, map.m_pblk, + "blocks %llu-%llu from inode %u overlap system zone", + map.m_pblk, + map.m_pblk + map.m_len - 1, + ino); + } break; } - err = add_system_zone(system_blks, map.m_pblk, n); - if (err < 0) - break; i += n; } } @@ -270,19 +272,19 @@ int ext4_setup_system_zone(struct super_block *sb) ((i < 5) || ((i % flex_size) == 0))) add_system_zone(system_blks, ext4_group_first_block_no(sb, i), - ext4_bg_num_gdb(sb, i) + 1); + ext4_bg_num_gdb(sb, i) + 1, 0); gdp = ext4_get_group_desc(sb, i, NULL); ret = add_system_zone(system_blks, - ext4_block_bitmap(sb, gdp), 1); + ext4_block_bitmap(sb, gdp), 1, 0); if (ret) goto err; ret = add_system_zone(system_blks, - ext4_inode_bitmap(sb, gdp), 1); + ext4_inode_bitmap(sb, gdp), 1, 0); if (ret) goto err; ret = add_system_zone(system_blks, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group); + sbi->s_itb_per_group, 0); if (ret) goto err; } @@ -331,7 +333,7 @@ void ext4_release_system_zone(struct super_block *sb) call_rcu(&system_blks->rcu, ext4_destroy_system_zone); } -int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, +int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, unsigned int count) { struct ext4_system_blocks *system_blks; @@ -343,9 +345,9 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, * mount option. */ rcu_read_lock(); - system_blks = rcu_dereference(sbi->system_blks); - ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk, - count); + system_blks = rcu_dereference(EXT4_SB(inode->i_sb)->system_blks); + ret = ext4_data_block_valid_rcu(EXT4_SB(inode->i_sb), system_blks, + start_blk, count, inode->i_ino); rcu_read_unlock(); return ret; } @@ -364,8 +366,7 @@ int ext4_check_blockref(const char *function, unsigned int line, while (bref < p+max) { blk = le32_to_cpu(*bref++); if (blk && - unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), - blk, 1))) { + unlikely(!ext4_inode_block_valid(inode, blk, 1))) { ext4_error_inode(inode, function, line, blk, "invalid block"); return -EFSCORRUPTED; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c73b9a1959a9..3994f012a9de 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3397,9 +3397,9 @@ extern void ext4_release_system_zone(struct super_block *sb); extern int ext4_setup_system_zone(struct super_block *sb); extern int __init ext4_init_system_zone(void); extern void ext4_exit_system_zone(void); -extern int ext4_data_block_valid(struct ext4_sb_info *sbi, - ext4_fsblk_t start_blk, - unsigned int count); +extern int ext4_inode_block_valid(struct inode *inode, + ext4_fsblk_t start_blk, + unsigned int count); extern int ext4_check_blockref(const char *, unsigned int, struct inode *, __le32 *, unsigned int); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4af3f36c8351..0eea09aa0f26 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -340,7 +340,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) */ if (lblock + len <= lblock) return 0; - return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); + return ext4_inode_block_valid(inode, block, len); } static int ext4_valid_extent_idx(struct inode *inode, @@ -348,7 +348,7 @@ static int ext4_valid_extent_idx(struct inode *inode, { ext4_fsblk_t block = ext4_idx_pblock(ext_idx); - return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); + return ext4_inode_block_valid(inode, block, 1); } static int ext4_valid_extent_entries(struct inode *inode, @@ -507,14 +507,10 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - if (!ext4_has_feature_journal(inode->i_sb) || - (inode->i_ino != - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); - if (err) - goto errout; - } + err = __ext4_ext_check(function, line, inode, + ext_block_hdr(bh), depth, pblk); + if (err) + goto errout; set_buffer_verified(bh); /* * If this is a leaf block, cache all of its entries diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index be2b66eb65f7..402641825712 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -858,8 +858,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, else if (ext4_should_journal_data(inode)) flags |= EXT4_FREE_BLOCKS_FORGET; - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, - count)) { + if (!ext4_inode_block_valid(inode, block_to_free, count)) { EXT4_ERROR_INODE(inode, "attempt to clear invalid " "blocks %llu len %lu", (unsigned long long) block_to_free, count); @@ -1004,8 +1003,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, if (!nr) continue; /* A hole */ - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), - nr, 1)) { + if (!ext4_inode_block_valid(inode, nr, 1)) { EXT4_ERROR_INODE(inode, "invalid indirect mapped " "block %lu (level %d)", diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 551a1056870d..0b07576af3bf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -394,8 +394,7 @@ static int __check_block_validity(struct inode *inode, const char *func, (inode->i_ino == le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) return 0; - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, - map->m_len)) { + if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, "lblock %lu mapped to illegal pblock %llu " "(length %d)", (unsigned long) map->m_lblk, @@ -4761,7 +4760,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ret = 0; if (ei->i_file_acl && - !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { + !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) { ext4_error_inode(inode, function, line, 0, "iget: bad extended attribute block %llu", ei->i_file_acl); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 19101941dadd..8b8c84db243a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3233,7 +3233,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); - if (!ext4_data_block_valid(sbi, block, len)) { + if (!ext4_inode_block_valid(ac->ac_inode, block, len)) { ext4_error(sb, "Allocating blocks %llu-%llu which overlap " "fs metadata", block, block+len); /* File system mounted not to panic on error @@ -5058,7 +5058,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && - !ext4_data_block_valid(sbi, block, count)) { + !ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks not in datazone - " "block = %llu, count = %lu", block, count); goto error_return; From 3f67e7cffa6ee2d49ccb697c569c6d30e6024160 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 28 Jul 2020 15:04:35 +0200 Subject: [PATCH 046/290] ext4: fold ext4_data_block_valid_rcu() into the caller After the previous patch, ext4_data_block_valid_rcu() has a single caller. Fold it into it. Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200728130437.7804-5-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/block_validity.c | 69 ++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 38 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index e830a9d4e10d..9c40214f31f9 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -144,40 +144,6 @@ static void debug_print_tree(struct ext4_sb_info *sbi) printk(KERN_CONT "\n"); } -/* - * Returns 1 if the passed-in block region (start_blk, - * start_blk+count) is valid; 0 if some part of the block region - * overlaps with filesystem metadata blocks. - */ -static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, - struct ext4_system_blocks *system_blks, - ext4_fsblk_t start_blk, - unsigned int count, ino_t ino) -{ - struct ext4_system_zone *entry; - struct rb_node *n; - - if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || - (start_blk + count < start_blk) || - (start_blk + count > ext4_blocks_count(sbi->s_es))) - return 0; - - if (system_blks == NULL) - return 1; - - n = system_blks->root.rb_node; - while (n) { - entry = rb_entry(n, struct ext4_system_zone, node); - if (start_blk + count - 1 < entry->start_blk) - n = n->rb_left; - else if (start_blk >= (entry->start_blk + entry->count)) - n = n->rb_right; - else - return entry->ino == ino; - } - return 1; -} - static int ext4_protect_reserved_inode(struct super_block *sb, struct ext4_system_blocks *system_blks, u32 ino) @@ -333,11 +299,24 @@ void ext4_release_system_zone(struct super_block *sb) call_rcu(&system_blks->rcu, ext4_destroy_system_zone); } +/* + * Returns 1 if the passed-in block region (start_blk, + * start_blk+count) is valid; 0 if some part of the block region + * overlaps with some other filesystem metadata blocks. + */ int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, unsigned int count) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_system_blocks *system_blks; - int ret; + struct ext4_system_zone *entry; + struct rb_node *n; + int ret = 1; + + if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (start_blk + count < start_blk) || + (start_blk + count > ext4_blocks_count(sbi->s_es))) + return 0; /* * Lock the system zone to prevent it being released concurrently @@ -345,9 +324,23 @@ int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, * mount option. */ rcu_read_lock(); - system_blks = rcu_dereference(EXT4_SB(inode->i_sb)->system_blks); - ret = ext4_data_block_valid_rcu(EXT4_SB(inode->i_sb), system_blks, - start_blk, count, inode->i_ino); + system_blks = rcu_dereference(sbi->system_blks); + if (system_blks == NULL) + goto out_rcu; + + n = system_blks->root.rb_node; + while (n) { + entry = rb_entry(n, struct ext4_system_zone, node); + if (start_blk + count - 1 < entry->start_blk) + n = n->rb_left; + else if (start_blk >= (entry->start_blk + entry->count)) + n = n->rb_right; + else { + ret = (entry->ino == inode->i_ino); + break; + } + } +out_rcu: rcu_read_unlock(); return ret; } From e7bfb5c9bb3d63cb2abb3ceaf1a429d9f02f942d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 28 Jul 2020 15:04:36 +0200 Subject: [PATCH 047/290] ext4: handle add_system_zone() failure in ext4_setup_system_zone() There's one place that fails to handle error from add_system_zone() call and thus we can fail to protect superblock and group-descriptor blocks properly in case of ENOMEM. Fix it. Reported-by: Lukas Czerner Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200728130437.7804-6-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/block_validity.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 9c40214f31f9..2d008c1b58f2 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -235,10 +235,13 @@ int ext4_setup_system_zone(struct super_block *sb) for (i=0; i < ngroups; i++) { cond_resched(); if (ext4_bg_has_super(sb, i) && - ((i < 5) || ((i % flex_size) == 0))) - add_system_zone(system_blks, + ((i < 5) || ((i % flex_size) == 0))) { + ret = add_system_zone(system_blks, ext4_group_first_block_no(sb, i), ext4_bg_num_gdb(sb, i) + 1, 0); + if (ret) + goto err; + } gdp = ext4_get_group_desc(sb, i, NULL); ret = add_system_zone(system_blks, ext4_block_bitmap(sb, gdp), 1, 0); From 0f5bde1db174f6c471f0bd27198575719dabe3e5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 28 Jul 2020 15:04:37 +0200 Subject: [PATCH 048/290] ext4: correctly restore system zone info when remount fails When remounting filesystem fails late during remount handling and block_validity mount option is also changed during the remount, we fail to restore system zone information to a state matching the mount option. This is mostly harmless, just the block validity checking will not match the situation described by the mount option. Make sure these two are always consistent. Reported-by: Lukas Czerner Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200728130437.7804-7-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/block_validity.c | 8 -------- fs/ext4/super.c | 29 +++++++++++++++++++++-------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 2d008c1b58f2..c54ba52f2dd4 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -220,14 +220,6 @@ int ext4_setup_system_zone(struct super_block *sb) int flex_size = ext4_flex_bg_size(sbi); int ret; - if (!test_opt(sb, BLOCK_VALIDITY)) { - if (sbi->system_blks) - ext4_release_system_zone(sb); - return 0; - } - if (sbi->system_blks) - return 0; - system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL); if (!system_blks) return -ENOMEM; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6507e30e58b5..0892d71a52bd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4727,11 +4727,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_set_resv_clusters(sb); - err = ext4_setup_system_zone(sb); - if (err) { - ext4_msg(sb, KERN_ERR, "failed to initialize system " - "zone (%d)", err); - goto failed_mount4a; + if (test_opt(sb, BLOCK_VALIDITY)) { + err = ext4_setup_system_zone(sb); + if (err) { + ext4_msg(sb, KERN_ERR, "failed to initialize system " + "zone (%d)", err); + goto failed_mount4a; + } } ext4_ext_init(sb); @@ -5747,9 +5749,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_register_li_request(sb, first_not_zeroed); } - err = ext4_setup_system_zone(sb); - if (err) - goto restore_opts; + /* + * Handle creation of system zone data early because it can fail. + * Releasing of existing data is done when we are sure remount will + * succeed. + */ + if (test_opt(sb, BLOCK_VALIDITY) && !sbi->system_blks) { + err = ext4_setup_system_zone(sb); + if (err) + goto restore_opts; + } if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) { err = ext4_commit_super(sb, 1); @@ -5771,6 +5780,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } } #endif + if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks) + ext4_release_system_zone(sb); /* * Some options can be enabled by ext4 and/or by VFS mount flag @@ -5792,6 +5803,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) sbi->s_commit_interval = old_opts.s_commit_interval; sbi->s_min_batch_time = old_opts.s_min_batch_time; sbi->s_max_batch_time = old_opts.s_max_batch_time; + if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks) + ext4_release_system_zone(sb); #ifdef CONFIG_QUOTA sbi->s_jquota_fmt = old_opts.s_jquota_fmt; for (i = 0; i < EXT4_MAXQUOTAS; i++) { From 377254b2cd2252c7c3151b113cbdf93a7736c2e9 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Fri, 31 Jul 2020 12:10:25 -0400 Subject: [PATCH 049/290] fs: prevent BUG_ON in submit_bh_wbc() If a device is hot-removed --- for example, when a physical device is unplugged from pcie slot or a nbd device's network is shutdown --- this can result in a BUG_ON() crash in submit_bh_wbc(). This is because the when the block device dies, the buffer heads will have their Buffer_Mapped flag get cleared, leading to the crash in submit_bh_wbc. We had attempted to work around this problem in commit a17712c8 ("ext4: check superblock mapped prior to committing"). Unfortunately, it's still possible to hit the BUG_ON(!buffer_mapped(bh)) if the device dies between when the work-around check in ext4_commit_super() and when submit_bh_wbh() is finally called: Code path: ext4_commit_super judge if 'buffer_mapped(sbh)' is false, return <== commit a17712c8 lock_buffer(sbh) ... unlock_buffer(sbh) __sync_dirty_buffer(sbh,... lock_buffer(sbh) judge if 'buffer_mapped(sbh))' is false, return <== added by this patch submit_bh(...,sbh) submit_bh_wbc(...,sbh,...) [100722.966497] kernel BUG at fs/buffer.c:3095! <== BUG_ON(!buffer_mapped(bh))' in submit_bh_wbc() [100722.966503] invalid opcode: 0000 [#1] SMP [100722.966566] task: ffff8817e15a9e40 task.stack: ffffc90024744000 [100722.966574] RIP: 0010:submit_bh_wbc+0x180/0x190 [100722.966575] RSP: 0018:ffffc90024747a90 EFLAGS: 00010246 [100722.966576] RAX: 0000000000620005 RBX: ffff8818a80603a8 RCX: 0000000000000000 [100722.966576] RDX: ffff8818a80603a8 RSI: 0000000000020800 RDI: 0000000000000001 [100722.966577] RBP: ffffc90024747ac0 R08: 0000000000000000 R09: ffff88207f94170d [100722.966578] R10: 00000000000437c8 R11: 0000000000000001 R12: 0000000000020800 [100722.966578] R13: 0000000000000001 R14: 000000000bf9a438 R15: ffff88195f333000 [100722.966580] FS: 00007fa2eee27700(0000) GS:ffff88203d840000(0000) knlGS:0000000000000000 [100722.966580] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [100722.966581] CR2: 0000000000f0b008 CR3: 000000201a622003 CR4: 00000000007606e0 [100722.966582] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [100722.966583] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [100722.966583] PKRU: 55555554 [100722.966583] Call Trace: [100722.966588] __sync_dirty_buffer+0x6e/0xd0 [100722.966614] ext4_commit_super+0x1d8/0x290 [ext4] [100722.966626] __ext4_std_error+0x78/0x100 [ext4] [100722.966635] ? __ext4_journal_get_write_access+0xca/0x120 [ext4] [100722.966646] ext4_reserve_inode_write+0x58/0xb0 [ext4] [100722.966655] ? ext4_dirty_inode+0x48/0x70 [ext4] [100722.966663] ext4_mark_inode_dirty+0x53/0x1e0 [ext4] [100722.966671] ? __ext4_journal_start_sb+0x6d/0xf0 [ext4] [100722.966679] ext4_dirty_inode+0x48/0x70 [ext4] [100722.966682] __mark_inode_dirty+0x17f/0x350 [100722.966686] generic_update_time+0x87/0xd0 [100722.966687] touch_atime+0xa9/0xd0 [100722.966690] generic_file_read_iter+0xa09/0xcd0 [100722.966694] ? page_cache_tree_insert+0xb0/0xb0 [100722.966704] ext4_file_read_iter+0x4a/0x100 [ext4] [100722.966707] ? __inode_security_revalidate+0x4f/0x60 [100722.966709] __vfs_read+0xec/0x160 [100722.966711] vfs_read+0x8c/0x130 [100722.966712] SyS_pread64+0x87/0xb0 [100722.966716] do_syscall_64+0x67/0x1b0 [100722.966719] entry_SYSCALL64_slow_path+0x25/0x25 To address this, add the check of 'buffer_mapped(bh)' to __sync_dirty_buffer(). This also has the benefit of fixing this for other file systems. With this addition, we can drop the workaround in ext4_commit_supper(). [ Commit description rewritten by tytso. ] Signed-off-by: Xianting Tian Link: https://lore.kernel.org/r/1596211825-8750-1-git-send-email-xianting_tian@126.com Signed-off-by: Theodore Ts'o --- fs/buffer.c | 9 +++++++++ fs/ext4/super.c | 7 ------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 64fe82ec65ff..75a8849abb5d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3160,6 +3160,15 @@ int __sync_dirty_buffer(struct buffer_head *bh, int op_flags) WARN_ON(atomic_read(&bh->b_count) < 1); lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { + /* + * The bh should be mapped, but it might not be if the + * device was hot-removed. Not much we can do but fail the I/O. + */ + if (!buffer_mapped(bh)) { + unlock_buffer(bh); + return -EIO; + } + get_bh(bh); bh->b_end_io = end_buffer_write_sync; ret = submit_bh(REQ_OP_WRITE, op_flags, bh); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0892d71a52bd..daa94c7f7271 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5235,13 +5235,6 @@ static int ext4_commit_super(struct super_block *sb, int sync) if (!sbh || block_device_ejected(sb)) return error; - /* - * The superblock bh should be mapped, but it might not be if the - * device was hot-removed. Not much we can do but fail the I/O. - */ - if (!buffer_mapped(sbh)) - return error; - /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock From 7303cb5bfe845f7d43cd9b2dbd37dbb266efda9b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 31 Jul 2020 18:21:35 +0200 Subject: [PATCH 050/290] ext4: fix checking of directory entry validity for inline directories ext4_search_dir() and ext4_generic_delete_entry() can be called both for standard director blocks and for inline directories stored inside inode or inline xattr space. For the second case we didn't call ext4_check_dir_entry() with proper constraints that could result in accepting corrupted directory entry as well as false positive filesystem errors like: EXT4-fs error (device dm-0): ext4_search_dir:1395: inode #28320400: block 113246792: comm dockerd: bad entry in directory: directory entry too close to block end - offset=0, inode=28320403, rec_len=32, name_len=8, size=4096 Fix the arguments passed to ext4_check_dir_entry(). Fixes: 109ba779d6cc ("ext4: check for directory entries too close to block end") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200731162135.8080-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cb2eb1967e73..b92571beab72 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1396,8 +1396,8 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, ext4_match(dir, fname, de)) { /* found a match - just to be sure, do * a full check */ - if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data, - bh->b_size, offset)) + if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf, + buf_size, offset)) return -1; *res_dir = de; return 1; @@ -2482,7 +2482,7 @@ int ext4_generic_delete_entry(handle_t *handle, de = (struct ext4_dir_entry_2 *)entry_buf; while (i < buf_size - csum_size) { if (ext4_check_dir_entry(dir, NULL, de, bh, - bh->b_data, bh->b_size, i)) + entry_buf, buf_size, i)) return -EFSCORRUPTED; if (de == de_del) { if (pde) From ea7dc097826b06a9746a2e74c2d6e78d35c98088 Mon Sep 17 00:00:00 2001 From: Ravulapati Vishnu vardhan rao Date: Fri, 7 Aug 2020 21:40:17 +0530 Subject: [PATCH 051/290] ASoC: amd: Replacing component->name with codec_dai->name. Replacing string compare with "codec_dai->name" instead of comparing with "codec_dai->component->name" in hw_params because, Here the component name for codec RT1015 is "i2c-10EC5682:00" and will never be "rt1015-aif1" as it is codec-dai->name. So, strcmp() always compares and fails to set the sysclk,pll,bratio for expected codec-dai="rt1015-aif1". Signed-off-by: Ravulapati Vishnu vardhan rao Link: https://lore.kernel.org/r/20200807161046.17932-1-Vishnuvardhanrao.Ravulapati@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp3x-rt5682-max9836.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp3x-rt5682-max9836.c b/sound/soc/amd/acp3x-rt5682-max9836.c index 55815fdaa1aa..406526e79af3 100644 --- a/sound/soc/amd/acp3x-rt5682-max9836.c +++ b/sound/soc/amd/acp3x-rt5682-max9836.c @@ -138,7 +138,7 @@ static int acp3x_1015_hw_params(struct snd_pcm_substream *substream, srate = params_rate(params); for_each_rtd_codec_dais(rtd, i, codec_dai) { - if (strcmp(codec_dai->component->name, "rt1015-aif")) + if (strcmp(codec_dai->name, "rt1015-aif")) continue; ret = snd_soc_dai_set_bclk_ratio(codec_dai, 64); if (ret < 0) From 135dd873d3c76d812ae64c668adef3f2c59ed27f Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Mon, 10 Aug 2020 09:12:34 +0200 Subject: [PATCH 052/290] spi: stm32h7: fix race condition at end of transfer The caller of stm32_spi_transfer_one(), spi_transfer_one_message(), is waiting for us to call spi_finalize_current_transfer() and will eventually schedule a new transfer, if available. We should guarantee that the spi controller is really available before calling spi_finalize_current_transfer(). Move the call to spi_finalize_current_transfer() _after_ the call to stm32_spi_disable(). Signed-off-by: Antonio Borneo Signed-off-by: Alain Volmat Link: https://lore.kernel.org/r/1597043558-29668-2-git-send-email-alain.volmat@st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 4a21feae0103..814a3ec3b8ad 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -971,8 +971,8 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) spin_unlock_irqrestore(&spi->lock, flags); if (end) { - spi_finalize_current_transfer(master); stm32h7_spi_disable(spi); + spi_finalize_current_transfer(master); } return IRQ_HANDLED; From 3373e9004acc0603242622b4378c64bc01d21b5f Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 10 Aug 2020 09:12:35 +0200 Subject: [PATCH 053/290] spi: stm32: fix fifo threshold level in case of short transfer When transfer is shorter than half of the fifo, set the data packet size up to transfer size instead of up to half of the fifo. Check also that threshold is set at least to 1 data frame. Signed-off-by: Amelie Delaunay Signed-off-by: Alain Volmat Link: https://lore.kernel.org/r/1597043558-29668-3-git-send-email-alain.volmat@st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 814a3ec3b8ad..e5450233f3f8 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -467,20 +467,27 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz, /** * stm32h7_spi_prepare_fthlv - Determine FIFO threshold level * @spi: pointer to the spi controller data structure + * @xfer_len: length of the message to be transferred */ -static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi) +static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi, u32 xfer_len) { - u32 fthlv, half_fifo; + u32 fthlv, half_fifo, packet; /* data packet should not exceed 1/2 of fifo space */ half_fifo = (spi->fifo_size / 2); - if (spi->cur_bpw <= 8) - fthlv = half_fifo; - else if (spi->cur_bpw <= 16) - fthlv = half_fifo / 2; + /* data_packet should not exceed transfer length */ + if (half_fifo > xfer_len) + packet = xfer_len; else - fthlv = half_fifo / 4; + packet = half_fifo; + + if (spi->cur_bpw <= 8) + fthlv = packet; + else if (spi->cur_bpw <= 16) + fthlv = packet / 2; + else + fthlv = packet / 4; /* align packet size with data registers access */ if (spi->cur_bpw > 8) @@ -488,6 +495,9 @@ static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi) else fthlv -= (fthlv % 4); /* multiple of 4 */ + if (!fthlv) + fthlv = 1; + return fthlv; } @@ -1393,7 +1403,7 @@ static void stm32h7_spi_set_bpw(struct stm32_spi *spi) cfg1_setb |= (bpw << STM32H7_SPI_CFG1_DSIZE_SHIFT) & STM32H7_SPI_CFG1_DSIZE; - spi->cur_fthlv = stm32h7_spi_prepare_fthlv(spi); + spi->cur_fthlv = stm32h7_spi_prepare_fthlv(spi, spi->cur_xferlen); fthlv = spi->cur_fthlv - 1; cfg1_clrb |= STM32H7_SPI_CFG1_FTHLV; @@ -1588,6 +1598,8 @@ static int stm32_spi_transfer_one_setup(struct stm32_spi *spi, spin_lock_irqsave(&spi->lock, flags); + spi->cur_xferlen = transfer->len; + if (spi->cur_bpw != transfer->bits_per_word) { spi->cur_bpw = transfer->bits_per_word; spi->cfg->set_bpw(spi); @@ -1635,8 +1647,6 @@ static int stm32_spi_transfer_one_setup(struct stm32_spi *spi, goto out; } - spi->cur_xferlen = transfer->len; - dev_dbg(spi->dev, "transfer communication mode set to %d\n", spi->cur_comm); dev_dbg(spi->dev, From 9cc61973bf9385b19ff5dda4a2a7e265fcba85e4 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 10 Aug 2020 09:12:36 +0200 Subject: [PATCH 054/290] spi: stm32: fix stm32_spi_prepare_mbr in case of odd clk_rate Fix spi->clk_rate when it is odd to the nearest lowest even value because minimum SPI divider is 2. Signed-off-by: Amelie Delaunay Signed-off-by: Alain Volmat Link: https://lore.kernel.org/r/1597043558-29668-4-git-send-email-alain.volmat@st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index e5450233f3f8..571dea72bf7e 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -441,7 +441,8 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz, { u32 div, mbrdiv; - div = DIV_ROUND_UP(spi->clk_rate, speed_hz); + /* Ensure spi->clk_rate is even */ + div = DIV_ROUND_UP(spi->clk_rate & ~0x1, speed_hz); /* * SPI framework set xfer->speed_hz to master->max_speed_hz if From db96bf976a4fc65439be0b4524c0d41427d98814 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 10 Aug 2020 09:12:37 +0200 Subject: [PATCH 055/290] spi: stm32: fixes suspend/resume management This patch adds pinctrl power management, and reconfigure spi controller in case of resume. Fixes: 038ac869c9d2 ("spi: stm32: add runtime PM support") Signed-off-by: Amelie Delaunay Signed-off-by: Alain Volmat Link: https://lore.kernel.org/r/1597043558-29668-5-git-send-email-alain.volmat@st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 571dea72bf7e..9b90a22543fd 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -2007,6 +2008,8 @@ static int stm32_spi_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); + pinctrl_pm_select_sleep_state(&pdev->dev); + return 0; } @@ -2018,13 +2021,18 @@ static int stm32_spi_runtime_suspend(struct device *dev) clk_disable_unprepare(spi->clk); - return 0; + return pinctrl_pm_select_sleep_state(dev); } static int stm32_spi_runtime_resume(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct stm32_spi *spi = spi_master_get_devdata(master); + int ret; + + ret = pinctrl_pm_select_default_state(dev); + if (ret) + return ret; return clk_prepare_enable(spi->clk); } @@ -2054,10 +2062,23 @@ static int stm32_spi_resume(struct device *dev) return ret; ret = spi_master_resume(master); - if (ret) + if (ret) { clk_disable_unprepare(spi->clk); + return ret; + } - return ret; + ret = pm_runtime_get_sync(dev); + if (ret) { + dev_err(dev, "Unable to power device:%d\n", ret); + return ret; + } + + spi->cfg->config(spi); + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return 0; } #endif From 60ccb3515fc61a0124c70aa37317f75b67560024 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 10 Aug 2020 09:12:38 +0200 Subject: [PATCH 056/290] spi: stm32: always perform registers configuration prior to transfer SPI registers content may have been lost upon suspend/resume sequence. So, always compute and apply the necessary configuration in stm32_spi_transfer_one_setup routine. Signed-off-by: Alain Volmat Link: https://lore.kernel.org/r/1597043558-29668-6-git-send-email-alain.volmat@st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 42 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 9b90a22543fd..d4b33b358a31 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -1597,41 +1597,33 @@ static int stm32_spi_transfer_one_setup(struct stm32_spi *spi, unsigned long flags; unsigned int comm_type; int nb_words, ret = 0; + int mbr; spin_lock_irqsave(&spi->lock, flags); spi->cur_xferlen = transfer->len; - if (spi->cur_bpw != transfer->bits_per_word) { - spi->cur_bpw = transfer->bits_per_word; - spi->cfg->set_bpw(spi); + spi->cur_bpw = transfer->bits_per_word; + spi->cfg->set_bpw(spi); + + /* Update spi->cur_speed with real clock speed */ + mbr = stm32_spi_prepare_mbr(spi, transfer->speed_hz, + spi->cfg->baud_rate_div_min, + spi->cfg->baud_rate_div_max); + if (mbr < 0) { + ret = mbr; + goto out; } - if (spi->cur_speed != transfer->speed_hz) { - int mbr; - - /* Update spi->cur_speed with real clock speed */ - mbr = stm32_spi_prepare_mbr(spi, transfer->speed_hz, - spi->cfg->baud_rate_div_min, - spi->cfg->baud_rate_div_max); - if (mbr < 0) { - ret = mbr; - goto out; - } - - transfer->speed_hz = spi->cur_speed; - stm32_spi_set_mbr(spi, mbr); - } + transfer->speed_hz = spi->cur_speed; + stm32_spi_set_mbr(spi, mbr); comm_type = stm32_spi_communication_type(spi_dev, transfer); - if (spi->cur_comm != comm_type) { - ret = spi->cfg->set_mode(spi, comm_type); + ret = spi->cfg->set_mode(spi, comm_type); + if (ret < 0) + goto out; - if (ret < 0) - goto out; - - spi->cur_comm = comm_type; - } + spi->cur_comm = comm_type; if (spi->cfg->set_data_idleness) spi->cfg->set_data_idleness(spi, transfer->len); From 0454357faaf8eea07af4f01a9e7afde175377b41 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 7 Aug 2020 14:10:57 +0200 Subject: [PATCH 057/290] dt-bindings: lpspi: Add missing boolean type for fsl,spi-only-use-cs1-sel When running "make dt_binding_check" (even if restricted to an unrelated binding document using DT_SCHEMA_FILES=...): Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml: ignoring, error in schema: properties: fsl,spi-only-use-cs1-sel warning: no schema found in file: Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml Fix this by adding a proper type definition for the vendor-specific fsl,spi-only-use-cs1-sel property. Fixes: 7ac9bbf6ab3085c2 ("dt-bindings: lpspi: New property in document DT bindings for LPSPI") Suggested-by: Rob Herring Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200807121057.14204-1-geert+renesas@glider.be Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml index 22882e769e26..312d8fee9dbb 100644 --- a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml +++ b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml @@ -39,6 +39,7 @@ properties: spi common code does not support use of CS signals discontinuously. i.MX8DXL-EVK board only uses CS1 without using CS0. Therefore, add this property to re-config the chipselect value in the LPSPI driver. + type: boolean required: - compatible From 78484d7c747e30468b35bd5f19edf602f50162a7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 9 Aug 2020 22:34:06 +0200 Subject: [PATCH 058/290] drm: amdgpu: Use the correct size when allocating memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When '*sgt' is allocated, we must allocated 'sizeof(**sgt)' bytes instead of 'sizeof(*sg)'. The sizeof(*sg) is bigger than sizeof(**sgt) so this wastes memory but it won't lead to corruption. Fixes: f44ffd677fb3 ("drm/amdgpu: add support for exporting VRAM using DMA-buf v3") Reviewed-by: Christian König Signed-off-by: Christophe JAILLET Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 134cc36e30c5..0739e259bf91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -462,7 +462,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, unsigned int pages; int i, r; - *sgt = kmalloc(sizeof(*sg), GFP_KERNEL); + *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL); if (!*sgt) return -ENOMEM; From d5bbb4761c9ffbe8e3a547b74ec26fb6ce31bf32 Mon Sep 17 00:00:00 2001 From: Liu ChengZhe Date: Thu, 6 Aug 2020 14:54:08 +0800 Subject: [PATCH 059/290] drm/amdgpu: Skip some registers config for SRIOV Some registers are not accessible to virtual function setup, so skip their initialization when in VF-SRIOV mode. v2: move SRIOV VF check into specify functions; modify commit description and comment. Signed-off-by: Liu ChengZhe Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index fa0bca3e1f73..5d2505956f84 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -135,6 +135,12 @@ static void gfxhub_v2_1_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + /* Setup L2 cache */ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1); @@ -190,6 +196,12 @@ static void gfxhub_v2_1_enable_system_domain(struct amdgpu_device *adev) static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev) { + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0xFFFFFFFF); WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, @@ -326,6 +338,13 @@ void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 757fa8e83f5b..c79fc54bc3c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -134,6 +134,12 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1); @@ -189,6 +195,12 @@ static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) { + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0xFFFFFFFF); @@ -318,6 +330,13 @@ void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); From 266d81d9eed30f4994d76a2b237c63ece062eefe Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 7 Aug 2020 15:03:40 +0800 Subject: [PATCH 060/290] drm/amd/powerplay: correct Vega20 cached smu feature state Correct the cached smu feature state on pp_features sysfs setting. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 3b8839641770..e9676ebdba63 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -979,10 +979,7 @@ static int vega20_disable_all_smu_features(struct pp_hwmgr *hwmgr) { struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); - uint64_t features_enabled; - int i; - bool enabled; - int ret = 0; + int i, ret = 0; PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableAllSmuFeatures, @@ -990,17 +987,8 @@ static int vega20_disable_all_smu_features(struct pp_hwmgr *hwmgr) "[DisableAllSMUFeatures] Failed to disable all smu features!", return ret); - ret = vega20_get_enabled_smc_features(hwmgr, &features_enabled); - PP_ASSERT_WITH_CODE(!ret, - "[DisableAllSMUFeatures] Failed to get enabled smc features!", - return ret); - - for (i = 0; i < GNLD_FEATURES_MAX; i++) { - enabled = (features_enabled & data->smu_features[i].smu_feature_bitmap) ? - true : false; - data->smu_features[i].enabled = enabled; - data->smu_features[i].supported = enabled; - } + for (i = 0; i < GNLD_FEATURES_MAX; i++) + data->smu_features[i].enabled = 0; return 0; } @@ -3230,10 +3218,11 @@ static int vega20_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf) static int vega20_set_ppfeature_status(struct pp_hwmgr *hwmgr, uint64_t new_ppfeature_masks) { - uint64_t features_enabled; - uint64_t features_to_enable; - uint64_t features_to_disable; - int ret = 0; + struct vega20_hwmgr *data = + (struct vega20_hwmgr *)(hwmgr->backend); + uint64_t features_enabled, features_to_enable, features_to_disable; + int i, ret = 0; + bool enabled; if (new_ppfeature_masks >= (1ULL << GNLD_FEATURES_MAX)) return -EINVAL; @@ -3262,6 +3251,17 @@ static int vega20_set_ppfeature_status(struct pp_hwmgr *hwmgr, uint64_t new_ppfe return ret; } + /* Update the cached feature enablement state */ + ret = vega20_get_enabled_smc_features(hwmgr, &features_enabled); + if (ret) + return ret; + + for (i = 0; i < GNLD_FEATURES_MAX; i++) { + enabled = (features_enabled & data->smu_features[i].smu_feature_bitmap) ? + true : false; + data->smu_features[i].enabled = enabled; + } + return 0; } From 2c5b8080d810d98e3e59617680218499b17c84a1 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 7 Aug 2020 17:01:47 +0800 Subject: [PATCH 061/290] drm/amd/powerplay: correct UVD/VCE PG state on custom pptable uploading The UVD/VCE PG state is managed by UVD and VCE IP. It's error-prone to assume the bootup state in SMU based on the dpm status. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index e9676ebdba63..ea70d736f6a8 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -1640,12 +1640,6 @@ static void vega20_init_powergate_state(struct pp_hwmgr *hwmgr) data->uvd_power_gated = true; data->vce_power_gated = true; - - if (data->smu_features[GNLD_DPM_UVD].enabled) - data->uvd_power_gated = false; - - if (data->smu_features[GNLD_DPM_VCE].enabled) - data->vce_power_gated = false; } static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr) From 97a9b60fa3bf29d29d0fc253a98096c02fb8106e Mon Sep 17 00:00:00 2001 From: "shiwu.zhang" Date: Fri, 7 Aug 2020 16:43:59 +0800 Subject: [PATCH 062/290] drm/amdgpu: update gc golden register for arcturus Update golden setting to improve performance on HPC and ML apps Signed-off-by: shiwu.zhang Tested-by: gang.long Reviewed-by: guchun.chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index cb9d60a4e05e..b95f22262a90 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -691,6 +691,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) }; static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { From a49f6727e14caff32419cc3002b9ae9cafb750d7 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 27 Jul 2020 21:21:16 -0400 Subject: [PATCH 063/290] drm/amd/display: Fix incorrect backlight register offset for DCN [Why] Typo in backlight refactor inctroduced wrong register offset. [How] Change DCE to DCN register map for PWRSEQ_REF_DIV Cc: stable@vger.kernel.org Signed-off-by: Aric Cyr Reviewed-by: Ashley Thomas Acked-by: Qingqing Zhuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h index 70ec691e14d2..99c68ca9c7e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h @@ -49,7 +49,7 @@ #define DCN_PANEL_CNTL_REG_LIST()\ DCN_PANEL_CNTL_SR(PWRSEQ_CNTL, LVTMA), \ DCN_PANEL_CNTL_SR(PWRSEQ_STATE, LVTMA), \ - DCE_PANEL_CNTL_SR(PWRSEQ_REF_DIV, LVTMA), \ + DCN_PANEL_CNTL_SR(PWRSEQ_REF_DIV, LVTMA), \ SR(BL_PWM_CNTL), \ SR(BL_PWM_CNTL2), \ SR(BL_PWM_PERIOD_CNTL), \ From e4ed4dbbc8383d42a197da8fe7ca6434b0f14def Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Wed, 29 Jul 2020 17:33:27 -0400 Subject: [PATCH 064/290] drm/amd/display: Fix LFC multiplier changing erratically [Why] 1. There is a calculation that is using frame_time_in_us instead of last_render_time_in_us to calculate whether choosing an LFC multiplier would cause the inserted frame duration to be outside of range. 2. We do not handle unsigned integer subtraction correctly and it underflows to a really large value, which causes some logic errors. [How] 1. Fix logic to calculate 'within range' using last_render_time_in_us 2. Split out delta_from_mid_point_delta_in_us calculation to ensure we don't underflow and wrap around Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Qingqing Zhuo Signed-off-by: Alex Deucher --- .../amd/display/modules/freesync/freesync.c | 40 ++++++++++++++----- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 81820f3d6b3b..d988533d4af5 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -324,22 +324,44 @@ static void apply_below_the_range(struct core_freesync *core_freesync, /* Choose number of frames to insert based on how close it * can get to the mid point of the variable range. + * - Delta for CEIL: delta_from_mid_point_in_us_1 + * - Delta for FLOOR: delta_from_mid_point_in_us_2 */ - if ((frame_time_in_us / mid_point_frames_ceil) > in_out_vrr->min_duration_in_us && - (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2 || - mid_point_frames_floor < 2)) { - frames_to_insert = mid_point_frames_ceil; - delta_from_mid_point_delta_in_us = delta_from_mid_point_in_us_2 - - delta_from_mid_point_in_us_1; - } else { + if ((last_render_time_in_us / mid_point_frames_ceil) < in_out_vrr->min_duration_in_us) { + /* Check for out of range. + * If using CEIL produces a value that is out of range, + * then we are forced to use FLOOR. + */ + frames_to_insert = mid_point_frames_floor; + } else if (mid_point_frames_floor < 2) { + /* Check if FLOOR would result in non-LFC. In this case + * choose to use CEIL + */ + frames_to_insert = mid_point_frames_ceil; + } else if (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2) { + /* If choosing CEIL results in a frame duration that is + * closer to the mid point of the range. + * Choose CEIL + */ + frames_to_insert = mid_point_frames_ceil; + } else { + /* If choosing FLOOR results in a frame duration that is + * closer to the mid point of the range. + * Choose FLOOR + */ frames_to_insert = mid_point_frames_floor; - delta_from_mid_point_delta_in_us = delta_from_mid_point_in_us_1 - - delta_from_mid_point_in_us_2; } /* Prefer current frame multiplier when BTR is enabled unless it drifts * too far from the midpoint */ + if (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2) { + delta_from_mid_point_delta_in_us = delta_from_mid_point_in_us_2 - + delta_from_mid_point_in_us_1; + } else { + delta_from_mid_point_delta_in_us = delta_from_mid_point_in_us_1 - + delta_from_mid_point_in_us_2; + } if (in_out_vrr->btr.frames_to_insert != 0 && delta_from_mid_point_delta_in_us < BTR_DRIFT_MARGIN) { if (((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) < From abba907c7a20032c2d504fd5afe3af7d440a09d0 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Wed, 29 Jul 2020 17:43:10 -0400 Subject: [PATCH 065/290] drm/amd/display: Switch to immediate mode for updating infopackets [Why] Using FRAME_UPDATE will result in infopacket to be potentially updated one frame late. In commit stream scenarios for previously active stream, some stale infopacket data from previous config might be erroneously sent out on initial frame after stream is re-enabled. [How] Switch to using IMMEDIATE_UPDATE mode Signed-off-by: Anthony Koo Reviewed-by: Ashley Thomas Acked-by: Qingqing Zhuo Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn10/dcn10_stream_encoder.c | 16 ++++++++-------- .../amd/display/dc/dcn10/dcn10_stream_encoder.h | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index 07b2f9399671..842abb4c475b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -121,35 +121,35 @@ void enc1_update_generic_info_packet( switch (packet_index) { case 0: REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, - AFMT_GENERIC0_FRAME_UPDATE, 1); + AFMT_GENERIC0_IMMEDIATE_UPDATE, 1); break; case 1: REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, - AFMT_GENERIC1_FRAME_UPDATE, 1); + AFMT_GENERIC1_IMMEDIATE_UPDATE, 1); break; case 2: REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, - AFMT_GENERIC2_FRAME_UPDATE, 1); + AFMT_GENERIC2_IMMEDIATE_UPDATE, 1); break; case 3: REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, - AFMT_GENERIC3_FRAME_UPDATE, 1); + AFMT_GENERIC3_IMMEDIATE_UPDATE, 1); break; case 4: REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, - AFMT_GENERIC4_FRAME_UPDATE, 1); + AFMT_GENERIC4_IMMEDIATE_UPDATE, 1); break; case 5: REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, - AFMT_GENERIC5_FRAME_UPDATE, 1); + AFMT_GENERIC5_IMMEDIATE_UPDATE, 1); break; case 6: REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, - AFMT_GENERIC6_FRAME_UPDATE, 1); + AFMT_GENERIC6_IMMEDIATE_UPDATE, 1); break; case 7: REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, - AFMT_GENERIC7_FRAME_UPDATE, 1); + AFMT_GENERIC7_IMMEDIATE_UPDATE, 1); break; default: break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h index ed385b1477be..30eae7459d50 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -281,7 +281,14 @@ struct dcn10_stream_enc_registers { SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC0_IMMEDIATE_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC1_IMMEDIATE_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_IMMEDIATE_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_IMMEDIATE_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_IMMEDIATE_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_IMMEDIATE_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_IMMEDIATE_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\ @@ -345,7 +352,14 @@ struct dcn10_stream_enc_registers { type AFMT_GENERIC2_FRAME_UPDATE;\ type AFMT_GENERIC3_FRAME_UPDATE;\ type AFMT_GENERIC4_FRAME_UPDATE;\ + type AFMT_GENERIC0_IMMEDIATE_UPDATE;\ + type AFMT_GENERIC1_IMMEDIATE_UPDATE;\ + type AFMT_GENERIC2_IMMEDIATE_UPDATE;\ + type AFMT_GENERIC3_IMMEDIATE_UPDATE;\ type AFMT_GENERIC4_IMMEDIATE_UPDATE;\ + type AFMT_GENERIC5_IMMEDIATE_UPDATE;\ + type AFMT_GENERIC6_IMMEDIATE_UPDATE;\ + type AFMT_GENERIC7_IMMEDIATE_UPDATE;\ type AFMT_GENERIC5_FRAME_UPDATE;\ type AFMT_GENERIC6_FRAME_UPDATE;\ type AFMT_GENERIC7_FRAME_UPDATE;\ From b24bdc37d03a0478189e20a50286092840f414fa Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Tue, 28 Jul 2020 15:10:35 +0800 Subject: [PATCH 066/290] drm/amd/display: Fix EDID parsing after resume from suspend [Why] Resuming from suspend, CEA blocks from EDID are not parsed and no video modes can support YUV420. When this happens, output bpc cannot go over 8-bit with 4K modes on HDMI. [How] In amdgpu_dm_update_connector_after_detect(), drm_add_edid_modes() is called after drm_connector_update_edid_property() to fully parse EDID and update display info. Cc: stable@vger.kernel.org Signed-off-by: Stylon Wang Reviewed-by: Nicholas Kazlauskas Acked-by: Qingqing Zhuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e6833a2d01f8..39444619d9b5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2196,6 +2196,7 @@ void amdgpu_dm_update_connector_after_detect( drm_connector_update_edid_property(connector, aconnector->edid); + drm_add_edid_modes(connector, aconnector->edid); if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, From 79940e4d10df9c737a394630968471c632246ee0 Mon Sep 17 00:00:00 2001 From: Jaehyun Chung Date: Thu, 30 Jul 2020 16:31:29 -0400 Subject: [PATCH 067/290] drm/amd/display: Blank stream before destroying HDCP session [Why] Stream disable sequence incorretly destroys HDCP session while stream is not blanked and while audio is not muted. This sequence causes a flash of corruption during mode change and an audio click. [How] Change sequence to blank stream before destroying HDCP session. Audio will also be muted by blanking the stream. Cc: stable@vger.kernel.org Signed-off-by: Jaehyun Chung Reviewed-by: Alvin Lee Acked-by: Qingqing Zhuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 4bd6e03a7ef3..117d8aaf2a9b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3286,12 +3286,11 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx) core_link_set_avmute(pipe_ctx, true); } + dc->hwss.blank_stream(pipe_ctx); #if defined(CONFIG_DRM_AMD_DC_HDCP) update_psp_stream_config(pipe_ctx, true); #endif - dc->hwss.blank_stream(pipe_ctx); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); From f41ed88cbd6f025f7a683a11a74f901555fba11c Mon Sep 17 00:00:00 2001 From: Daniel Kolesa Date: Sat, 8 Aug 2020 22:42:35 +0200 Subject: [PATCH 068/290] drm/amdgpu/display: use GFP_ATOMIC in dcn20_validate_bandwidth_internal GFP_KERNEL may and will sleep, and this is being executed in a non-preemptible context; this will mess things up since it's called inbetween DC_FP_START/END, and rescheduling will result in the DC_FP_END later being called in a different context (or just crashing if any floating point/vector registers/instructions are used after the call is resumed in a different context). Signed-off-by: Daniel Kolesa Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 790baf552695..9140b3fc767a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3141,7 +3141,7 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co int vlevel = 0; int pipe_split_from[MAX_PIPES]; int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC); DC_LOGGER_INIT(dc->ctx->logger); BW_VAL_TRACE_COUNT(); From efc913c8fb88728626759735e1b09370a6813180 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 10 Aug 2020 15:46:31 +0200 Subject: [PATCH 069/290] ASoC: Make soc_component_read() returning an error code again Along with the recent unification of snd_soc_component_read*() functions, the behavior of snd_soc_component_read() was changed slightly; namely it returns the register read value directly, and even if an error happens, it returns zero (but it prints an error message). That said, the caller side can't know whether it's an error or not any longer. Ideally this shouldn't matter much, but in practice this seems causing a regression, as John reported. And, grepping the tree revealed that there are still plenty of callers that do check the error code, so we'll need to deal with them in anyway. As a quick band-aid over the regression, this patch changes the return value of snd_soc_component_read() again to the negative error code. It can't work, obviously, for 32bit register values, but it should be enough for the known regressions, so far. Fixes: cf6e26c71bfd ("ASoC: soc-component: merge snd_soc_component_read() and snd_soc_component_read32()") Reported-by: John Stultz Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200810134631.19742-1-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/soc-component.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index f0b4f4bc44a4..5504b92946e3 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -406,7 +406,7 @@ static unsigned int soc_component_read_no_lock( ret = -EIO; if (ret < 0) - soc_component_ret(component, ret); + return soc_component_ret(component, ret); return val; } From 56235e4bc5ae58cb8fcd9314dba4e9ab077ddda8 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 11 Aug 2020 13:02:04 +0100 Subject: [PATCH 070/290] ASoC: q6afe-dai: mark all widgets registers as SND_SOC_NOPM Looks like the q6afe-dai dapm widget registers are set as "0", which is a not correct. As this registers will be read by ASoC core during startup which will throw up errors, Fix this by making the registers as SND_SOC_NOPM as these should be never used. With recent changes to ASoC core, every register read/write failures are reported very verbosely. Prior to this fails to reads are totally ignored, so we never saw any error messages. Fixes: 24c4cbcfac09 ("ASoC: qdsp6: q6afe: Add q6afe dai driver") Reported-by: John Stultz Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20200811120205.21805-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6afe-dai.c | 210 +++++++++++++++---------------- 1 file changed, 105 insertions(+), 105 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6afe-dai.c b/sound/soc/qcom/qdsp6/q6afe-dai.c index 2a5302f1db98..0168af849272 100644 --- a/sound/soc/qcom/qdsp6/q6afe-dai.c +++ b/sound/soc/qcom/qdsp6/q6afe-dai.c @@ -1150,206 +1150,206 @@ static int q6afe_of_xlate_dai_name(struct snd_soc_component *component, } static const struct snd_soc_dapm_widget q6afe_dai_widgets[] = { - SND_SOC_DAPM_AIF_IN("HDMI_RX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("SLIMBUS_0_RX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("SLIMBUS_1_RX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("SLIMBUS_2_RX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("SLIMBUS_3_RX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("SLIMBUS_4_RX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("SLIMBUS_5_RX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("SLIMBUS_6_RX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("SLIMBUS_0_TX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("SLIMBUS_1_TX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("SLIMBUS_2_TX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("SLIMBUS_3_TX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("SLIMBUS_4_TX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("SLIMBUS_5_TX", NULL, 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("SLIMBUS_6_TX", NULL, 0, 0, 0, 0), + SND_SOC_DAPM_AIF_IN("HDMI_RX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("SLIMBUS_0_RX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("SLIMBUS_1_RX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("SLIMBUS_2_RX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("SLIMBUS_3_RX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("SLIMBUS_4_RX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("SLIMBUS_5_RX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("SLIMBUS_6_RX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("SLIMBUS_0_TX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("SLIMBUS_1_TX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("SLIMBUS_2_TX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("SLIMBUS_3_TX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("SLIMBUS_4_TX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("SLIMBUS_5_TX", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("SLIMBUS_6_TX", NULL, 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUAT_MI2S_RX", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUAT_MI2S_TX", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("TERT_MI2S_RX", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("TERT_MI2S_TX", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SEC_MI2S_RX", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("SEC_MI2S_TX", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SEC_MI2S_RX_SD1", "Secondary MI2S Playback SD1", - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("PRI_MI2S_RX", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("PRI_MI2S_TX", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_0", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_1", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_2", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_3", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_4", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_5", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_6", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_7", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_0", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_1", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_2", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_3", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_4", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_5", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_6", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_7", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_0", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_1", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_2", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_3", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_4", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_5", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_6", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_7", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_0", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_1", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_2", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_3", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_4", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_5", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_6", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_7", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_0", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_1", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_2", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_3", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_4", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_5", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_6", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_7", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_0", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_1", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_2", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_3", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_4", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_5", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_6", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_7", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_0", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_1", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_2", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_3", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_4", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_5", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_6", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_7", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_0", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_1", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_2", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_3", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_4", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_5", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_6", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_7", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_0", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_1", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_2", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_3", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_4", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_5", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_6", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_7", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_0", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_1", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_2", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_3", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_4", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_5", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_6", NULL, - 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_7", NULL, - 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("DISPLAY_PORT_RX", "NULL", 0, 0, 0, 0), + 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("DISPLAY_PORT_RX", "NULL", 0, SND_SOC_NOPM, 0, 0), }; static const struct snd_soc_component_driver q6afe_dai_component = { From 796a58fe2b8c9b6668db00d92512ec84be663027 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 11 Aug 2020 13:02:05 +0100 Subject: [PATCH 071/290] ASoC: q6routing: add dummy register read/write function Most of the DAPM widgets for DSP ASoC components reuse reg field of the widgets for its internal calculations, however these are not real registers. So read/writes to these numbers are not really valid. However ASoC core will read these registers to get default state during startup. With recent changes to ASoC core, every register read/write failures are reported very verbosely. Prior to this fails to reads are totally ignored, so we never saw any error messages. To fix this add dummy read/write function to return default value. Fixes: e3a33673e845 ("ASoC: qdsp6: q6routing: Add q6routing driver") Reported-by: John Stultz Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20200811120205.21805-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6routing.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/soc/qcom/qdsp6/q6routing.c b/sound/soc/qcom/qdsp6/q6routing.c index eaa95b5a7b66..25d23e0266c7 100644 --- a/sound/soc/qcom/qdsp6/q6routing.c +++ b/sound/soc/qcom/qdsp6/q6routing.c @@ -973,6 +973,20 @@ static int msm_routing_probe(struct snd_soc_component *c) return 0; } +static unsigned int q6routing_reg_read(struct snd_soc_component *component, + unsigned int reg) +{ + /* default value */ + return 0; +} + +static int q6routing_reg_write(struct snd_soc_component *component, + unsigned int reg, unsigned int val) +{ + /* dummy */ + return 0; +} + static const struct snd_soc_component_driver msm_soc_routing_component = { .probe = msm_routing_probe, .name = DRV_NAME, @@ -981,6 +995,8 @@ static const struct snd_soc_component_driver msm_soc_routing_component = { .num_dapm_widgets = ARRAY_SIZE(msm_qdsp6_widgets), .dapm_routes = intercon, .num_dapm_routes = ARRAY_SIZE(intercon), + .read = q6routing_reg_read, + .write = q6routing_reg_write, }; static int q6pcm_routing_probe(struct platform_device *pdev) From f70fff83cda63bbf596f99edc131b9daaba07458 Mon Sep 17 00:00:00 2001 From: Mike Pozulp Date: Thu, 13 Aug 2020 21:53:44 -0700 Subject: [PATCH 072/290] ALSA: hda/realtek: Add quirk for Samsung Galaxy Flex Book The Flex Book uses the same ALC298 codec as other Samsung laptops which have the no headphone sound bug, like my Samsung Notebook. The Flex Book owner used Early Patching to confirm that this quirk fixes the bug. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207423 Signed-off-by: Mike Pozulp Cc: Link: https://lore.kernel.org/r/20200814045346.645367-1-pozulp.kernel@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7f9d35273734..167b6fd6842d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7694,6 +7694,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), From 9420139f516d7fbc248ce17f35275cb005ed98ea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Aug 2020 12:26:24 +0200 Subject: [PATCH 073/290] dma-pool: fix coherent pool allocations for IOMMU mappings When allocating coherent pool memory for an IOMMU mapping we don't care about the DMA mask. Move the guess for the initial GFP mask into the dma_direct_alloc_pages and pass dma_coherent_ok as a function pointer argument so that it doesn't get applied to the IOMMU case. Signed-off-by: Christoph Hellwig Tested-by: Amit Pundir --- drivers/iommu/dma-iommu.c | 4 +- include/linux/dma-direct.h | 3 - include/linux/dma-mapping.h | 5 +- kernel/dma/direct.c | 13 ++-- kernel/dma/pool.c | 116 +++++++++++++++--------------------- 5 files changed, 63 insertions(+), 78 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4959f5df21bd..5141d49a046b 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1035,8 +1035,8 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !gfpflags_allow_blocking(gfp) && !coherent) - cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, - gfp); + page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr, + gfp, NULL); else cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs); if (!cpu_addr) diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 5a3ce2a24794..6e87225600ae 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -73,9 +73,6 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, } u64 dma_direct_get_required_mask(struct device *dev); -gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, - u64 *phys_mask); -bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 016b96b384bd..52635e91143b 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -522,8 +522,9 @@ void *dma_common_pages_remap(struct page **pages, size_t size, pgprot_t prot, const void *caller); void dma_common_free_remap(void *cpu_addr, size_t size); -void *dma_alloc_from_pool(struct device *dev, size_t size, - struct page **ret_page, gfp_t flags); +struct page *dma_alloc_from_pool(struct device *dev, size_t size, + void **cpu_addr, gfp_t flags, + bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)); bool dma_free_from_pool(struct device *dev, void *start, size_t size); int diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index bb0041e99659..db6ef07aec3b 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -43,7 +43,7 @@ u64 dma_direct_get_required_mask(struct device *dev) return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; } -gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, +static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, u64 *phys_limit) { u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); @@ -68,7 +68,7 @@ gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, return 0; } -bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) +static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { return phys_to_dma_direct(dev, phys) + size - 1 <= min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); @@ -161,8 +161,13 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, size = PAGE_ALIGN(size); if (dma_should_alloc_from_pool(dev, gfp, attrs)) { - ret = dma_alloc_from_pool(dev, size, &page, gfp); - if (!ret) + u64 phys_mask; + + gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, + &phys_mask); + page = dma_alloc_from_pool(dev, size, &ret, gfp, + dma_coherent_ok); + if (!page) return NULL; goto done; } diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 6bc74a2d5127..5d071d4a3cba 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -196,93 +196,75 @@ static int __init dma_atomic_pool_init(void) } postcore_initcall(dma_atomic_pool_init); -static inline struct gen_pool *dma_guess_pool_from_device(struct device *dev) +static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp) { - u64 phys_mask; - gfp_t gfp; - - gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, - &phys_mask); - if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA) + if (prev == NULL) { + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) + return atomic_pool_dma32; + if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) + return atomic_pool_dma; + return atomic_pool_kernel; + } + if (prev == atomic_pool_kernel) + return atomic_pool_dma32 ? atomic_pool_dma32 : atomic_pool_dma; + if (prev == atomic_pool_dma32) return atomic_pool_dma; - if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32) - return atomic_pool_dma32; - return atomic_pool_kernel; -} - -static inline struct gen_pool *dma_get_safer_pool(struct gen_pool *bad_pool) -{ - if (bad_pool == atomic_pool_kernel) - return atomic_pool_dma32 ? : atomic_pool_dma; - - if (bad_pool == atomic_pool_dma32) - return atomic_pool_dma; - return NULL; } -static inline struct gen_pool *dma_guess_pool(struct device *dev, - struct gen_pool *bad_pool) +static struct page *__dma_alloc_from_pool(struct device *dev, size_t size, + struct gen_pool *pool, void **cpu_addr, + bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)) { - if (bad_pool) - return dma_get_safer_pool(bad_pool); - - return dma_guess_pool_from_device(dev); -} - -void *dma_alloc_from_pool(struct device *dev, size_t size, - struct page **ret_page, gfp_t flags) -{ - struct gen_pool *pool = NULL; - unsigned long val = 0; - void *ptr = NULL; + unsigned long addr; phys_addr_t phys; - while (1) { - pool = dma_guess_pool(dev, pool); - if (!pool) { - WARN(1, "Failed to get suitable pool for %s\n", - dev_name(dev)); - break; - } + addr = gen_pool_alloc(pool, size); + if (!addr) + return NULL; - val = gen_pool_alloc(pool, size); - if (!val) - continue; - - phys = gen_pool_virt_to_phys(pool, val); - if (dma_coherent_ok(dev, phys, size)) - break; - - gen_pool_free(pool, val, size); - val = 0; + phys = gen_pool_virt_to_phys(pool, addr); + if (phys_addr_ok && !phys_addr_ok(dev, phys, size)) { + gen_pool_free(pool, addr, size); + return NULL; } + if (gen_pool_avail(pool) < atomic_pool_size) + schedule_work(&atomic_pool_work); - if (val) { - *ret_page = pfn_to_page(__phys_to_pfn(phys)); - ptr = (void *)val; - memset(ptr, 0, size); + *cpu_addr = (void *)addr; + memset(*cpu_addr, 0, size); + return pfn_to_page(__phys_to_pfn(phys)); +} - if (gen_pool_avail(pool) < atomic_pool_size) - schedule_work(&atomic_pool_work); +struct page *dma_alloc_from_pool(struct device *dev, size_t size, + void **cpu_addr, gfp_t gfp, + bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)) +{ + struct gen_pool *pool = NULL; + struct page *page; + + while ((pool = dma_guess_pool(pool, gfp))) { + page = __dma_alloc_from_pool(dev, size, pool, cpu_addr, + phys_addr_ok); + if (page) + return page; } - return ptr; + WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev)); + return NULL; } bool dma_free_from_pool(struct device *dev, void *start, size_t size) { struct gen_pool *pool = NULL; - while (1) { - pool = dma_guess_pool(dev, pool); - if (!pool) - return false; - - if (gen_pool_has_addr(pool, (unsigned long)start, size)) { - gen_pool_free(pool, (unsigned long)start, size); - return true; - } + while ((pool = dma_guess_pool(pool, 0))) { + if (!gen_pool_has_addr(pool, (unsigned long)start, size)) + continue; + gen_pool_free(pool, (unsigned long)start, size); + return true; } + + return false; } From d7e673ec2c8e0ea39c4c70fc490d67d7fbda869d Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Fri, 14 Aug 2020 12:26:23 +0200 Subject: [PATCH 074/290] dma-pool: Only allocate from CMA when in same memory zone There is no guarantee to CMA's placement, so allocating a zone specific atomic pool from CMA might return memory from a completely different memory zone. To get around this double check CMA's placement before allocating from it. Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Christoph Hellwig --- kernel/dma/pool.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 5d071d4a3cba..06582b488e31 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -3,7 +3,9 @@ * Copyright (C) 2012 ARM Ltd. * Copyright (C) 2020 Google LLC */ +#include #include +#include #include #include #include @@ -55,6 +57,29 @@ static void dma_atomic_pool_size_add(gfp_t gfp, size_t size) pool_size_kernel += size; } +static bool cma_in_zone(gfp_t gfp) +{ + unsigned long size; + phys_addr_t end; + struct cma *cma; + + cma = dev_get_cma_area(NULL); + if (!cma) + return false; + + size = cma_get_size(cma); + if (!size) + return false; + + /* CMA can't cross zone boundaries, see cma_activate_area() */ + end = cma_get_base(cma) + size - 1; + if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) + return end <= DMA_BIT_MASK(zone_dma_bits); + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) + return end <= DMA_BIT_MASK(32); + return true; +} + static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, gfp_t gfp) { @@ -68,7 +93,11 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, do { pool_size = 1 << (PAGE_SHIFT + order); - page = alloc_pages(gfp, order); + if (cma_in_zone(gfp)) + page = dma_alloc_from_contiguous(NULL, 1 << order, + order, false); + if (!page) + page = alloc_pages(gfp, order); } while (!page && order-- > 0); if (!page) goto out; From 470757f5b3a46bd85741bb0d8c1fd3f21048a2af Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Sat, 15 Aug 2020 03:21:03 +0300 Subject: [PATCH 075/290] ALSA: usb-audio: Add capture support for Saffire 6 (USB 1.1) Capture and playback endpoints on Saffire 6 (USB 1.1) resides on the same interface. This was not supported by the composite quirk back in the day when initial support for this device was added, thus only playback was enabled until now. Fixes: 11e424e88bd4 ("ALSA: usb-audio: Add support for Focusrite Saffire 6 USB") Signed-off-by: Alexander Tsoy Cc: Link: https://lore.kernel.org/r/20200815002103.29247-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index d79e3ddc5690..e6202608e043 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2678,6 +2678,10 @@ YAMAHA_DEVICE(0x7010, "UB99"), .ifnum = QUIRK_ANY_INTERFACE, .type = QUIRK_COMPOSITE, .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_MIXER, + }, { .ifnum = 0, .type = QUIRK_AUDIO_FIXED_ENDPOINT, @@ -2690,6 +2694,32 @@ YAMAHA_DEVICE(0x7010, "UB99"), .attributes = UAC_EP_CS_ATTR_SAMPLE_RATE, .endpoint = 0x01, .ep_attr = USB_ENDPOINT_XFER_ISOC, + .datainterval = 1, + .maxpacksize = 0x024c, + .rates = SNDRV_PCM_RATE_44100 | + SNDRV_PCM_RATE_48000, + .rate_min = 44100, + .rate_max = 48000, + .nr_rates = 2, + .rate_table = (unsigned int[]) { + 44100, 48000 + } + } + }, + { + .ifnum = 0, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3LE, + .channels = 2, + .iface = 0, + .altsetting = 1, + .altset_idx = 1, + .attributes = 0, + .endpoint = 0x82, + .ep_attr = USB_ENDPOINT_XFER_ISOC, + .datainterval = 1, + .maxpacksize = 0x0126, .rates = SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000, .rate_min = 44100, From 24633d901ea44fe99bc9a2d01a3881fa097b78b3 Mon Sep 17 00:00:00 2001 From: Vaibhav Shankar Date: Thu, 13 Aug 2020 19:22:34 -0700 Subject: [PATCH 076/290] perf/x86/intel/uncore: Add BW counters for GT, IA and IO breakdown Linux only has support to read total DDR reads and writes. Here we add support to enable bandwidth breakdown-GT, IA and IO. Breakdown of BW is important to debug and optimize memory access. This can also be used for telemetry and improving the system software.The offsets for GT, IA and IO are added and these free running counters can be accessed via MMIO space. The BW breakdown can be measured using the following cmd: perf stat -e uncore_imc/gt_requests/,uncore_imc/ia_requests/,uncore_imc/io_requests/ 30.57 MiB uncore_imc/gt_requests/ 1346.13 MiB uncore_imc/ia_requests/ 190.97 MiB uncore_imc/io_requests/ 5.984572733 seconds time elapsed BW/s = _requests/time elapsed Signed-off-by: Vaibhav Shankar Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200814022234.23605-1-vaibhav.shankar@intel.com --- arch/x86/events/intel/uncore_snb.c | 52 ++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index cb94ba86efd2..6a4ca27b2c9e 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -390,6 +390,18 @@ static struct uncore_event_desc snb_uncore_imc_events[] = { INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(gt_requests, "event=0x03"), + INTEL_UNCORE_EVENT_DESC(gt_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(gt_requests.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(ia_requests, "event=0x04"), + INTEL_UNCORE_EVENT_DESC(ia_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(ia_requests.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(io_requests, "event=0x05"), + INTEL_UNCORE_EVENT_DESC(io_requests.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(io_requests.unit, "MiB"), + { /* end: all zeroes */ }, }; @@ -405,13 +417,35 @@ static struct uncore_event_desc snb_uncore_imc_events[] = { #define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 #define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE +/* BW break down- legacy counters */ +#define SNB_UNCORE_PCI_IMC_GT_REQUESTS 0x3 +#define SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE 0x5040 +#define SNB_UNCORE_PCI_IMC_IA_REQUESTS 0x4 +#define SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE 0x5044 +#define SNB_UNCORE_PCI_IMC_IO_REQUESTS 0x5 +#define SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE 0x5048 + enum perf_snb_uncore_imc_freerunning_types { - SNB_PCI_UNCORE_IMC_DATA = 0, + SNB_PCI_UNCORE_IMC_DATA_READS = 0, + SNB_PCI_UNCORE_IMC_DATA_WRITES, + SNB_PCI_UNCORE_IMC_GT_REQUESTS, + SNB_PCI_UNCORE_IMC_IA_REQUESTS, + SNB_PCI_UNCORE_IMC_IO_REQUESTS, + SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, }; static struct freerunning_counters snb_uncore_imc_freerunning[] = { - [SNB_PCI_UNCORE_IMC_DATA] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x4, 0x0, 2, 32 }, + [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_IA_REQUESTS] = { SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, + [SNB_PCI_UNCORE_IMC_IO_REQUESTS] = { SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE, + 0x0, 0x0, 1, 32 }, }; static struct attribute *snb_uncore_imc_formats_attr[] = { @@ -525,6 +559,18 @@ static int snb_uncore_imc_event_init(struct perf_event *event) base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; idx = UNCORE_PMC_IDX_FREERUNNING; break; + case SNB_UNCORE_PCI_IMC_GT_REQUESTS: + base = SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_IA_REQUESTS: + base = SNB_UNCORE_PCI_IMC_IA_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; + case SNB_UNCORE_PCI_IMC_IO_REQUESTS: + base = SNB_UNCORE_PCI_IMC_IO_REQUESTS_BASE; + idx = UNCORE_PMC_IDX_FREERUNNING; + break; default: return -EINVAL; } @@ -598,7 +644,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = { static struct intel_uncore_type snb_uncore_imc = { .name = "imc", - .num_counters = 2, + .num_counters = 5, .num_boxes = 1, .num_freerunning_types = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, .mmio_map_size = SNB_UNCORE_PCI_IMC_MAP_SIZE, From f5d0f820ff8ae7349f9615e686cfc7b0e36211d9 Mon Sep 17 00:00:00 2001 From: Liang Wang Date: Sun, 16 Aug 2020 15:13:09 +0800 Subject: [PATCH 077/290] ALSA: isa: fix spelling mistakes in the comments Fix spelling mistakes in the comments: initailise ==> initialise tranfer ==> transfer Initialse ==> Initialise Signed-off-by: Liang Wang Link: https://lore.kernel.org/r/20200816071309.121461-1-wangliang101@huawei.com Signed-off-by: Takashi Iwai --- sound/isa/sscape.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c index 5363d88cc4b9..2e5a5c5279e8 100644 --- a/sound/isa/sscape.c +++ b/sound/isa/sscape.c @@ -308,7 +308,7 @@ static inline int verify_mpu401(const struct snd_mpu401 *mpu) } /* - * This is apparently the standard way to initailise an MPU-401 + * This is apparently the standard way to initialise an MPU-401 */ static inline void initialise_mpu401(const struct snd_mpu401 *mpu) { @@ -339,7 +339,7 @@ static void soundscape_free(struct snd_card *c) } /* - * Tell the SoundScape to begin a DMA tranfer using the given channel. + * Tell the SoundScape to begin a DMA transfer using the given channel. * All locking issues are left to the caller. */ static void sscape_start_dma_unsafe(unsigned io_base, enum GA_REG reg) @@ -803,7 +803,7 @@ static int mpu401_open(struct snd_mpu401 *mpu) } /* - * Initialse an MPU-401 subdevice for MIDI support on the SoundScape. + * Initialise an MPU-401 subdevice for MIDI support on the SoundScape. */ static int create_mpu401(struct snd_card *card, int devnum, unsigned long port, int irq) From b711d4eaf0c408a811311ee3e94d6e9e5a230a9a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 16 Aug 2020 08:23:05 -0700 Subject: [PATCH 078/290] io_uring: find and cancel head link async work on files exit Commit f254ac04c874 ("io_uring: enable lookup of links holding inflight files") only handled 2 out of the three head link cases we have, we also need to lookup and cancel work that is blocked in io-wq if that work has a link that's holding a reference to the files structure. Put the "cancel head links that hold this request pending" logic into io_attempt_cancel(), which will to through the motions of finding and canceling head links that hold the current inflight files stable request pending. Cc: stable@vger.kernel.org Reported-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index dc506b75659c..346a3eb84785 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8063,6 +8063,33 @@ static bool io_timeout_remove_link(struct io_ring_ctx *ctx, return found; } +static bool io_cancel_link_cb(struct io_wq_work *work, void *data) +{ + return io_match_link(container_of(work, struct io_kiocb, work), data); +} + +static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) +{ + enum io_wq_cancel cret; + + /* cancel this particular work, if it's running */ + cret = io_wq_cancel_work(ctx->io_wq, &req->work); + if (cret != IO_WQ_CANCEL_NOTFOUND) + return; + + /* find links that hold this pending, cancel those */ + cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_link_cb, req, true); + if (cret != IO_WQ_CANCEL_NOTFOUND) + return; + + /* if we have a poll link holding this pending, cancel that */ + if (io_poll_remove_link(ctx, req)) + return; + + /* final option, timeout link is holding this req pending */ + io_timeout_remove_link(ctx, req); +} + static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { @@ -8116,10 +8143,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, continue; } } else { - io_wq_cancel_work(ctx->io_wq, &cancel_req->work); - /* could be a link, check and remove if it is */ - if (!io_poll_remove_link(ctx, cancel_req)) - io_timeout_remove_link(ctx, cancel_req); + /* cancel this request, or head link requests */ + io_attempt_cancel(ctx, cancel_req); io_put_req(cancel_req); } From 3b2a4439e0ae1732f90877a7160bbf42e1beb4b6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 16 Aug 2020 10:58:43 -0700 Subject: [PATCH 079/290] io_uring: get rid of kiocb_wait_page_queue_init() The 5.9 merge moved this function io_uring, which means that we don't need to retain the generic nature of it. Clean up this part by removing redundant checks, and just inlining the small remainder in io_rw_should_retry(). No functional changes in this patch. Signed-off-by: Jens Axboe --- fs/io_uring.c | 41 +++++++++++------------------------------ 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 346a3eb84785..4b102d9ad846 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3074,27 +3074,6 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, return 1; } -static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb, - struct wait_page_queue *wait, - wait_queue_func_t func, - void *data) -{ - /* Can't support async wakeup with polled IO */ - if (kiocb->ki_flags & IOCB_HIPRI) - return -EINVAL; - if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) { - wait->wait.func = func; - wait->wait.private = data; - wait->wait.flags = 0; - INIT_LIST_HEAD(&wait->wait.entry); - kiocb->ki_flags |= IOCB_WAITQ; - kiocb->ki_waitq = wait; - return 0; - } - - return -EOPNOTSUPP; -} - /* * This controls whether a given IO request should be armed for async page * based retry. If we return false here, the request is handed to the async @@ -3109,16 +3088,17 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb, */ static bool io_rw_should_retry(struct io_kiocb *req) { + struct wait_page_queue *wait = &req->io->rw.wpq; struct kiocb *kiocb = &req->rw.kiocb; - int ret; /* never retry for NOWAIT, we just complete with -EAGAIN */ if (req->flags & REQ_F_NOWAIT) return false; /* Only for buffered IO */ - if (kiocb->ki_flags & IOCB_DIRECT) + if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) return false; + /* * just use poll if we can, and don't attempt if the fs doesn't * support callback based unlocks @@ -3126,14 +3106,15 @@ static bool io_rw_should_retry(struct io_kiocb *req) if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) return false; - ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq, - io_async_buf_func, req); - if (!ret) { - io_get_req_task(req); - return true; - } + wait->wait.func = io_async_buf_func; + wait->wait.private = req; + wait->wait.flags = 0; + INIT_LIST_HEAD(&wait->wait.entry); + kiocb->ki_flags |= IOCB_WAITQ; + kiocb->ki_waitq = wait; - return false; + io_get_req_task(req); + return true; } static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) From 030a2c689fb46e1690f7ded8b194bab7678efb28 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 3 Aug 2020 13:56:00 +1000 Subject: [PATCH 080/290] powerpc: Fix P10 PVR revision in /proc/cpuinfo for SMT4 cores On POWER10 bit 12 in the PVR indicates if the core is SMT4 or SMT8. Bit 12 is set for SMT4. Without this patch, /proc/cpuinfo on a SMT4 DD1 POWER10 looks like this: cpu : POWER10, altivec supported revision : 17.0 (pvr 0080 1100) Fixes: a3ea40d5c736 ("powerpc: Add POWER10 architected mode") Cc: stable@vger.kernel.org # v5.8 Signed-off-by: Michael Neuling Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200803035600.1820371-1-mikey@neuling.org --- arch/powerpc/kernel/setup-common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b198b0ff25bc..808ec9fab605 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -311,6 +311,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) min = pvr & 0xFF; break; case 0x004e: /* POWER9 bits 12-15 give chip type */ + case 0x0080: /* POWER10 bit 12 gives SMT8/4 */ maj = (pvr >> 8) & 0x0F; min = pvr & 0xFF; break; From 781fa4811d95314c1965c0c3337c9ac36ef26093 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Fri, 7 Aug 2020 06:05:00 -0400 Subject: [PATCH 081/290] powerpc/perf: Add support for outputting extended regs in perf intr_regs Add support for perf extended register capability in powerpc. The capability flag PERF_PMU_CAP_EXTENDED_REGS, is used to indicate the PMU which support extended registers. The generic code define the mask of extended registers as 0 for non supported architectures. Patch adds extended regs support for power9 platform by exposing MMCR0, MMCR1 and MMCR2 registers. REG_RESERVED mask needs update to include extended regs. PERF_REG_EXTENDED_MASK, contains mask value of the supported registers, is defined at runtime in the kernel based on platform since the supported registers may differ from one processor version to another and hence the MASK value. With the patch: available registers: r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 nip msr orig_r3 ctr link xer ccr softe trap dar dsisr sier mmcra mmcr0 mmcr1 mmcr2 PERF_RECORD_SAMPLE(IP, 0x1): 4784/4784: 0 period: 1 addr: 0 ... intr regs: mask 0xffffffffffff ABI 64-bit .... r0 0xc00000000012b77c .... r1 0xc000003fe5e03930 .... r2 0xc000000001b0e000 .... r3 0xc000003fdcddf800 .... r4 0xc000003fc7880000 .... r5 0x9c422724be .... r6 0xc000003fe5e03908 .... r7 0xffffff63bddc8706 .... r8 0x9e4 .... r9 0x0 .... r10 0x1 .... r11 0x0 .... r12 0xc0000000001299c0 .... r13 0xc000003ffffc4800 .... r14 0x0 .... r15 0x7fffdd8b8b00 .... r16 0x0 .... r17 0x7fffdd8be6b8 .... r18 0x7e7076607730 .... r19 0x2f .... r20 0xc00000001fc26c68 .... r21 0xc0002041e4227e00 .... r22 0xc00000002018fb60 .... r23 0x1 .... r24 0xc000003ffec4d900 .... r25 0x80000000 .... r26 0x0 .... r27 0x1 .... r28 0x1 .... r29 0xc000000001be1260 .... r30 0x6008010 .... r31 0xc000003ffebb7218 .... nip 0xc00000000012b910 .... msr 0x9000000000009033 .... orig_r3 0xc00000000012b86c .... ctr 0xc0000000001299c0 .... link 0xc00000000012b77c .... xer 0x0 .... ccr 0x28002222 .... softe 0x1 .... trap 0xf00 .... dar 0x0 .... dsisr 0x80000000000 .... sier 0x0 .... mmcra 0x80000000000 .... mmcr0 0x82008090 .... mmcr1 0x1e000000 .... mmcr2 0x0 ... thread: perf:4784 Signed-off-by: Anju T Sudhakar Signed-off-by: Athira Rajeev Tested-by: Nageswara R Sastry Reviewed-by: Madhavan Srinivasan Reviewed-by: Kajol Jain Reviewed-and-tested-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1596794701-23530-2-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/include/asm/perf_event.h | 3 ++ arch/powerpc/include/asm/perf_event_server.h | 5 +++ arch/powerpc/include/uapi/asm/perf_regs.h | 14 +++++++- arch/powerpc/perf/core-book3s.c | 1 + arch/powerpc/perf/perf_regs.c | 34 ++++++++++++++++++-- arch/powerpc/perf/power9-pmu.c | 6 ++++ 6 files changed, 59 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 1e8b2e1ec1db..daec64d41b44 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -40,4 +40,7 @@ static inline bool is_sier_available(void) { return false; } /* To support perf_regs sier update */ extern bool is_sier_available(void); +/* To define perf extended regs mask value */ +extern u64 PERF_REG_EXTENDED_MASK; +#define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK #endif diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 86c9eb064b22..f6acabb6c9be 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -62,6 +62,11 @@ struct power_pmu { int *blacklist_ev; /* BHRB entries in the PMU */ int bhrb_nr; + /* + * set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if + * the pmu supports extended perf regs capability + */ + int capabilities; }; /* diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index f599064dd8dc..225c64c56813 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -48,6 +48,18 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_DSISR, PERF_REG_POWERPC_SIER, PERF_REG_POWERPC_MMCRA, - PERF_REG_POWERPC_MAX, + /* Extended registers */ + PERF_REG_POWERPC_MMCR0, + PERF_REG_POWERPC_MMCR1, + PERF_REG_POWERPC_MMCR2, + /* Max regs without the extended regs */ + PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; + +#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) + +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ +#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) + +#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 78fe34986594..00038650a007 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2323,6 +2323,7 @@ int register_power_pmu(struct power_pmu *pmu) pmu->name); power_pmu.attr_groups = ppmu->attr_groups; + power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS); #ifdef MSR_HV /* diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index a213a0aa5d25..9301e6852d53 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -13,9 +13,11 @@ #include #include +u64 PERF_REG_EXTENDED_MASK; + #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) -#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1)) +#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK)) static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]), @@ -69,10 +71,26 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr), }; +/* Function to return the extended register values */ +static u64 get_ext_regs_value(int idx) +{ + switch (idx) { + case PERF_REG_POWERPC_MMCR0: + return mfspr(SPRN_MMCR0); + case PERF_REG_POWERPC_MMCR1: + return mfspr(SPRN_MMCR1); + case PERF_REG_POWERPC_MMCR2: + return mfspr(SPRN_MMCR2); + default: return 0; + } +} + u64 perf_reg_value(struct pt_regs *regs, int idx) { - if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX)) - return 0; + u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX; + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + perf_reg_extended_max = PERF_REG_MAX_ISA_300; if (idx == PERF_REG_POWERPC_SIER && (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) || @@ -85,6 +103,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) IS_ENABLED(CONFIG_PPC32))) return 0; + if (idx >= PERF_REG_POWERPC_MAX && idx < perf_reg_extended_max) + return get_ext_regs_value(idx); + + /* + * If the idx is referring to value beyond the + * supported registers, return 0 with a warning + */ + if (WARN_ON_ONCE(idx >= perf_reg_extended_max)) + return 0; + return regs_get_register(regs, pt_regs_offset[idx]); } diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 05dae38b969a..2a57e93a79dc 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -90,6 +90,8 @@ enum { #define POWER9_MMCRA_IFM3 0x00000000C0000000UL #define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL +extern u64 PERF_REG_EXTENDED_MASK; + /* Nasty Power9 specific hack */ #define PVR_POWER9_CUMULUS 0x00002000 @@ -434,6 +436,7 @@ static struct power_pmu power9_pmu = { .cache_events = &power9_cache_events, .attr_groups = power9_pmu_attr_groups, .bhrb_nr = 32, + .capabilities = PERF_PMU_CAP_EXTENDED_REGS, }; int init_power9_pmu(void) @@ -457,6 +460,9 @@ int init_power9_pmu(void) } } + /* Set the PERF_REG_EXTENDED_MASK here */ + PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300; + rc = register_power_pmu(&power9_pmu); if (rc) return rc; From d735599a069f6936c1392e07075c34a19bda949a Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 7 Aug 2020 06:05:01 -0400 Subject: [PATCH 082/290] powerpc/perf: Add extended regs support for power10 platform Include capability flag PERF_PMU_CAP_EXTENDED_REGS for power10 and expose MMCR3, SIER2, SIER3 registers as part of extended regs. Also introduce PERF_REG_PMU_MASK_31 to define extended mask value at runtime for power10. Suggested-by: Ryan Grimm Signed-off-by: Athira Rajeev Tested-by: Nageswara R Sastry Reviewed-by: Kajol Jain Reviewed-and-tested-by: Ravi Bangoria Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1596794701-23530-3-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/include/uapi/asm/perf_regs.h | 6 ++++++ arch/powerpc/perf/perf_regs.c | 12 +++++++++++- arch/powerpc/perf/power10-pmu.c | 6 ++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index 225c64c56813..bdf5f10f8b9f 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -52,6 +52,9 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_MMCR0, PERF_REG_POWERPC_MMCR1, PERF_REG_POWERPC_MMCR2, + PERF_REG_POWERPC_MMCR3, + PERF_REG_POWERPC_SIER2, + PERF_REG_POWERPC_SIER3, /* Max regs without the extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; @@ -60,6 +63,9 @@ enum perf_event_powerpc_regs { /* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ #define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */ +#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK) #define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) +#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 9301e6852d53..8e53f2fc3fe0 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -81,6 +81,14 @@ static u64 get_ext_regs_value(int idx) return mfspr(SPRN_MMCR1); case PERF_REG_POWERPC_MMCR2: return mfspr(SPRN_MMCR2); +#ifdef CONFIG_PPC64 + case PERF_REG_POWERPC_MMCR3: + return mfspr(SPRN_MMCR3); + case PERF_REG_POWERPC_SIER2: + return mfspr(SPRN_SIER2); + case PERF_REG_POWERPC_SIER3: + return mfspr(SPRN_SIER3); +#endif default: return 0; } } @@ -89,7 +97,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) { u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX; - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_31)) + perf_reg_extended_max = PERF_REG_MAX_ISA_31; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) perf_reg_extended_max = PERF_REG_MAX_ISA_300; if (idx == PERF_REG_POWERPC_SIER && diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index f7cff7f36a1c..83148656b524 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -87,6 +87,8 @@ #define POWER10_MMCRA_IFM3 0x00000000C0000000UL #define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL +extern u64 PERF_REG_EXTENDED_MASK; + /* Table of alternatives, sorted by column 0 */ static const unsigned int power10_event_alternatives[][MAX_ALT] = { { PM_RUN_CYC_ALT, PM_RUN_CYC }, @@ -397,6 +399,7 @@ static struct power_pmu power10_pmu = { .cache_events = &power10_cache_events, .attr_groups = power10_pmu_attr_groups, .bhrb_nr = 32, + .capabilities = PERF_PMU_CAP_EXTENDED_REGS, }; int init_power10_pmu(void) @@ -408,6 +411,9 @@ int init_power10_pmu(void) strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) return -ENODEV; + /* Set the PERF_REG_EXTENDED_MASK here */ + PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31; + rc = register_power_pmu(&power10_pmu); if (rc) return rc; From 327da008e65a25b8206b36b7fc0c9e4edbb36a58 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 17 Aug 2020 06:26:18 +0530 Subject: [PATCH 083/290] powerpc: Add POWER10 raw mode cputable entry Add a raw mode cputable entry for POWER10. Copies most of the fields from commit a3ea40d5c736 ("powerpc: Add POWER10 architected mode") except for oprofile_cpu_type, machine_check_early, pvr_mask and pvr_mask fields. On bare metal systems we use DT CPU features, which doesn't need a cputable entry. But in VMs we still rely on the raw cputable entry to set the correct values for the PMU related fields. Fixes: a3ea40d5c736 ("powerpc: Add POWER10 architected mode") Signed-off-by: Madhavan Srinivasan [mpe: Reorder vs cleanup patch and add Fixes tag] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200817005618.3305028-2-maddy@linux.ibm.com --- arch/powerpc/kernel/cputable.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 3d406a9626e8..93b986660664 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -75,6 +75,7 @@ extern void __restore_cpu_power10(void); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); +extern long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -542,6 +543,25 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power10", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .machine_check_early = __machine_check_early_realmode_p10, + .platform = "power10", + }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, From 388692e943a58f28aac0fe83e75f5994da9ff8a1 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 17 Aug 2020 06:26:17 +0530 Subject: [PATCH 084/290] powerpc/kernel: Cleanup machine check function declarations __machine_check_early_realmode_p*() are currently declared as extern in cputable.c and because of this when compiled with "C=1" (which enables semantic checker) produces these warnings. CHECK arch/powerpc/kernel/mce_power.c arch/powerpc/kernel/mce_power.c:709:6: warning: symbol '__machine_check_early_realmode_p7' was not declared. Should it be static? arch/powerpc/kernel/mce_power.c:717:6: warning: symbol '__machine_check_early_realmode_p8' was not declared. Should it be static? arch/powerpc/kernel/mce_power.c:722:6: warning: symbol '__machine_check_early_realmode_p9' was not declared. Should it be static? arch/powerpc/kernel/mce_power.c:740:6: warning: symbol '__machine_check_early_realmode_p10' was not declared. Should it be static? Patch here moves the declaration to asm/mce.h and includes the same in cputable.c Fixes: ae744f3432d3 ("powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power8") Fixes: 7b9f71f974a1 ("powerpc/64s: POWER9 machine check handler") Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200817005618.3305028-1-maddy@linux.ibm.com --- arch/powerpc/include/asm/cputable.h | 5 +++++ arch/powerpc/include/asm/mce.h | 7 +++++++ arch/powerpc/kernel/cputable.c | 4 ---- arch/powerpc/kernel/dt_cpu_ftrs.c | 4 ---- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index fdddb822d564..e005b4581023 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -9,6 +9,11 @@ #ifndef __ASSEMBLY__ +/* + * Added to include __machine_check_early_realmode_* functions + */ +#include + /* This structure can grow, it's real size is used by head.S code * via the mkdefs mechanism. */ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index adf2cda67f9a..89aa8248a57d 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -210,6 +210,9 @@ struct mce_error_info { #define MCE_EVENT_RELEASE true #define MCE_EVENT_DONTRELEASE false +struct pt_regs; +struct notifier_block; + extern void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr, uint64_t phys_addr); @@ -225,5 +228,9 @@ int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); +long __machine_check_early_realmode_p7(struct pt_regs *regs); +long __machine_check_early_realmode_p8(struct pt_regs *regs); +long __machine_check_early_realmode_p9(struct pt_regs *regs); +long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 93b986660664..2aa89c6b2896 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -72,10 +72,6 @@ extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power9(void); extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power10(void); -extern long __machine_check_early_realmode_p7(struct pt_regs *regs); -extern long __machine_check_early_realmode_p8(struct pt_regs *regs); -extern long __machine_check_early_realmode_p9(struct pt_regs *regs); -extern long __machine_check_early_realmode_p10(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 6f8c0c6b937a..8dc46f38680b 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -64,10 +64,6 @@ struct dt_cpu_feature { * Set up the base CPU */ -extern long __machine_check_early_realmode_p8(struct pt_regs *regs); -extern long __machine_check_early_realmode_p9(struct pt_regs *regs); -extern long __machine_check_early_realmode_p10(struct pt_regs *regs); - static int hv_mode; static struct { From 8979ef70850eb469e1094279259d1ef393ffe85f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 11 Aug 2020 14:28:36 -0700 Subject: [PATCH 085/290] opp: Put opp table in dev_pm_opp_set_rate() for empty tables We get the opp_table pointer at the top of the function and so we should put the pointer at the end of the function like all other exit paths from this function do. Cc: v5.7+ # v5.7+ Fixes: aca48b61f963 ("opp: Manage empty OPP tables with clk handle") Reviewed-by: Rajendra Nayak Signed-off-by: Stephen Boyd [ Viresh: Split the patch into two ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 9d7fb45b1786..f2f32786ee45 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -893,8 +893,10 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) * have OPP table for the device, while others don't and * opp_set_rate() just needs to behave like clk_set_rate(). */ - if (!_get_opp_count(opp_table)) - return 0; + if (!_get_opp_count(opp_table)) { + ret = 0; + goto put_opp_table; + } if (!opp_table->required_opp_tables && !opp_table->regulators && !opp_table->paths) { From d4ec88d205583ac4f9482cf3e89128589bd881d2 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 11 Aug 2020 14:28:36 -0700 Subject: [PATCH 086/290] opp: Put opp table in dev_pm_opp_set_rate() if _set_opp_bw() fails We get the opp_table pointer at the top of the function and so we should put the pointer at the end of the function like all other exit paths from this function do. Cc: v5.8+ # v5.8+ Fixes: b00e667a6d8b ("opp: Remove bandwidth votes when target_freq is zero") Reviewed-by: Rajendra Nayak Signed-off-by: Stephen Boyd [ Viresh: Split the patch into two ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index f2f32786ee45..bdb028c7793d 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -907,7 +907,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) ret = _set_opp_bw(opp_table, NULL, dev, true); if (ret) - return ret; + goto put_opp_table; if (opp_table->regulator_enabled) { regulator_disable(opp_table->regulators[0]); From 74a2a7de81a2ef20732ec02087314e92692a7a1b Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Sun, 16 Aug 2020 17:44:31 +0900 Subject: [PATCH 087/290] ALSA: usb-audio: Update documentation comment for MS2109 quirk As the recent fix addressed the channel swap problem more properly, update the comment as well. Fixes: 1b7ecc241a67 ("ALSA: usb-audio: work around streaming quirk for MacroSilicon MS2109") Signed-off-by: Hector Martin Link: https://lore.kernel.org/r/20200816084431.102151-1-marcan@marcan.st Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index e6202608e043..f4fb002e3ef4 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3744,8 +3744,8 @@ ALC1220_VB_DESKTOP(0x26ce, 0x0a01), /* Asrock TRX40 Creator */ * they pretend to be 96kHz mono as a workaround for stereo being broken * by that... * - * They also have swapped L-R channels, but that's for userspace to deal - * with. + * They also have an issue with initial stream alignment that causes the + * channels to be swapped and out of phase, which is dealt with in quirks.c. */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | From 23dc958689449be85e39351a8c809c3d344b155b Mon Sep 17 00:00:00 2001 From: Mike Pozulp Date: Sun, 16 Aug 2020 21:32:17 -0700 Subject: [PATCH 088/290] ALSA: hda/realtek: Add model alc298-samsung-headphone The very quiet and distorted headphone output bug that afflicted my Samsung Notebook 9 is appearing in many other Samsung laptops. Expose the quirk which fixed my laptop as a model so other users can try it. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207423 Signed-off-by: Mike Pozulp Link: https://lore.kernel.org/r/20200817043219.458889-1-pozulp.kernel@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 167b6fd6842d..5722f0bd3b31 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7956,6 +7956,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"}, {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"}, {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"}, + {.id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc298-samsung-headphone"}, {} }; #define ALC225_STANDARD_PINS \ From 0b8eb2ee9da1e8c9b8082f404f3948aa82a057b2 Mon Sep 17 00:00:00 2001 From: Vineeth Vijayan Date: Thu, 18 Jun 2020 16:42:45 +0200 Subject: [PATCH 089/290] s390/cio: add cond_resched() in the slow_eval_known_fn() loop The scanning through subchannels during the time of an event could take significant amount of time in case of platforms with lots of known subchannels. This might result in higher scheduling latencies for other tasks especially on systems with a single CPU. Add cond_resched() call, as the loop in slow_eval_known_fn() can be executed for a longer duration. Reviewed-by: Peter Oberparleiter Signed-off-by: Vineeth Vijayan Signed-off-by: Heiko Carstens --- drivers/s390/cio/css.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 94edbb33d0d1..aca022239b33 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -677,6 +677,11 @@ static int slow_eval_known_fn(struct subchannel *sch, void *data) rc = css_evaluate_known_subchannel(sch, 1); if (rc == -EAGAIN) css_schedule_eval(sch->schid); + /* + * The loop might take long time for platforms with lots of + * known devices. Allow scheduling here. + */ + cond_resched(); } return 0; } From b76fee1bc56c31a9d2a49592810eba30cc06d61a Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 4 Aug 2020 13:01:26 +0200 Subject: [PATCH 090/290] s390/pci: ignore stale configuration request event A configuration request event may be stale, that is the event may reference a zdev which was already configured. This can happen when a hotplug happens during boot such that the device is discovered and configured in the initial clp_list_pci(), then after initialization we enable events and process the original configuration request which additionally still contains the old disabled function handle leading to a failure during device enablement and subsequent I/O lockout. Fix this by restoring the check that the device to be configured is in standby which was removed in commit f606b3ef47c9 ("s390/pci: adapt events for zbus"). This check does not need serialization as we only enable the events after zPCI has fully initialized, which includes the initial clp_list_pci(), rescan only does updates and events are serialized with respect to each other. Fixes: f606b3ef47c9 ("s390/pci: adapt events for zbus") Cc: # 5.8 Reported-by: Shalini Chellathurai Saroja Tested-by: Shalini Chellathurai Saroja Acked-by: Pierre Morel Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index fdebd286f402..4602068c7548 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -92,6 +92,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 1); break; } + /* the configuration request may be stale */ + if (zdev->state != ZPCI_FN_STATE_STANDBY) + break; zdev->fh = ccdf->fh; zdev->state = ZPCI_FN_STATE_CONFIGURED; ret = zpci_enable_device(zdev); From 9eaba29c7985236e16468f4e6a49cc18cf01443e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 12 Aug 2020 18:55:41 +0200 Subject: [PATCH 091/290] s390/runtime_instrumentation: fix storage key handling The key member of the runtime instrumentation control block contains only the access key, not the complete storage key. Therefore the value must be shifted by four bits. Note: this is only relevant for debugging purposes in case somebody compiles a kernel with a default storage access key set to a value not equal to zero. Fixes: e4b8b3f33fca ("s390: add support for runtime instrumentation") Reported-by: Claudio Imbrenda Signed-off-by: Heiko Carstens --- arch/s390/kernel/runtime_instr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 125c7f6e8715..1788a5454b6f 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -57,7 +57,7 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb) cb->k = 1; cb->ps = 1; cb->pc = 1; - cb->key = PAGE_DEFAULT_KEY; + cb->key = PAGE_DEFAULT_KEY >> 4; cb->v = 1; } From fd78c59446b8d050ecf3e0897c5a486c7de7c595 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 12 Aug 2020 18:56:28 +0200 Subject: [PATCH 092/290] s390/ptrace: fix storage key handling The key member of the runtime instrumentation control block contains only the access key, not the complete storage key. Therefore the value must be shifted by four bits. Since existing user space does not necessarily query and set the access key correctly, just ignore the user space provided key and use the correct one. Note: this is only relevant for debugging purposes in case somebody compiles a kernel with a default storage access key set to a value not equal to zero. Fixes: 262832bc5acd ("s390/ptrace: add runtime instrumention register get/set") Reported-by: Claudio Imbrenda Signed-off-by: Heiko Carstens --- arch/s390/kernel/ptrace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 11d2f7d05f91..a76dd27fb2e8 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1268,7 +1268,6 @@ static bool is_ri_cb_valid(struct runtime_instr_cb *cb) cb->pc == 1 && cb->qc == 0 && cb->reserved2 == 0 && - cb->key == PAGE_DEFAULT_KEY && cb->reserved3 == 0 && cb->reserved4 == 0 && cb->reserved5 == 0 && @@ -1330,7 +1329,11 @@ static int s390_runtime_instr_set(struct task_struct *target, kfree(data); return -EINVAL; } - + /* + * Override access key in any case, since user space should + * not be able to set it, nor should it care about it. + */ + ri_cb.key = PAGE_DEFAULT_KEY >> 4; preempt_disable(); if (!target->thread.ri_cb) target->thread.ri_cb = data; From 3cddb79afc60bcdb5fd9dd7a1c64a8d03bdd460f Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 3 Aug 2020 09:33:29 +0200 Subject: [PATCH 093/290] s390/pci: fix zpci_bus_link_virtfn() We were missing the pci_dev_put() for candidate PFs. Furhtermore in discussion with upstream it turns out that somewhat counterintuitively some common code, in particular the vfio-pci driver, assumes that pdev->is_virtfn always implies that pdev->physfn is set, i.e. that VFs are always linked. While POWER does seem to set pdev->is_virtfn even for unlinked functions (see comments in arch/powerpc/kernel/eeh.c:eeh_debugfs_break_device()) for now just be safe and only set pdev->is_virtfn on linking. Also make sure that we only search for parent PFs if the zbus is multifunction and we thus know the devfn values supplied by firmware come from the RID. Fixes: e5794cf1a270 ("s390/pci: create links between PFs and VFs") Cc: # 5.8 Reviewed-by: Pierre Morel Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_bus.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 642a99384688..423af8b1da7b 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -132,13 +132,14 @@ static int zpci_bus_link_virtfn(struct pci_dev *pdev, { int rc; - virtfn->physfn = pci_dev_get(pdev); rc = pci_iov_sysfs_link(pdev, virtfn, vfid); - if (rc) { - pci_dev_put(pdev); - virtfn->physfn = NULL; + if (rc) return rc; - } + + virtfn->is_virtfn = 1; + virtfn->multifunction = 0; + virtfn->physfn = pci_dev_get(pdev); + return 0; } @@ -151,9 +152,9 @@ static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ int rc = 0; - virtfn->is_virtfn = 1; - virtfn->multifunction = 0; - WARN_ON(vfid < 0); + if (!zbus->multifunction) + return 0; + /* If the parent PF for the given VF is also configured in the * instance, it must be on the same zbus. * We can then identify the parent PF by checking what @@ -165,11 +166,17 @@ static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, zdev = zbus->function[i]; if (zdev && zdev->is_physfn) { pdev = pci_get_slot(zbus->bus, zdev->devfn); + if (!pdev) + continue; cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); if (cand_devfn == virtfn->devfn) { rc = zpci_bus_link_virtfn(pdev, virtfn, vfid); + /* balance pci_get_slot() */ + pci_dev_put(pdev); break; } + /* balance pci_get_slot() */ + pci_dev_put(pdev); } } return rc; @@ -178,8 +185,6 @@ static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) { - virtfn->is_virtfn = 1; - virtfn->multifunction = 0; return 0; } #endif From 2f0230b2f2d5fd287a85583eefb5aed35b6fe510 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 3 Aug 2020 17:46:32 +0200 Subject: [PATCH 094/290] s390/pci: re-introduce zpci_remove_device() For fixing the PF to VF link removal we need to perform some action on every removal of a zdev from the common PCI subsystem. So in preparation re-introduce zpci_remove_device() and use that instead of directly calling the common code functions. This was actually still declared from earlier code but no longer implemented. Reviewed-by: Pierre Morel Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci.c | 19 ++++++++++++------- arch/s390/pci/pci_event.c | 4 ++-- drivers/pci/hotplug/s390_pci_hpc.c | 12 +++++------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 3902c9f6f2d6..6e57ff885f8a 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -672,6 +672,16 @@ int zpci_disable_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_disable_device); +void zpci_remove_device(struct zpci_dev *zdev) +{ + struct zpci_bus *zbus = zdev->zbus; + struct pci_dev *pdev; + + pdev = pci_get_slot(zbus->bus, zdev->devfn); + if (pdev) + pci_stop_and_remove_bus_device_locked(pdev); +} + int zpci_create_device(struct zpci_dev *zdev) { int rc; @@ -716,13 +726,8 @@ void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); - if (zdev->zbus->bus) { - struct pci_dev *pdev; - - pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); - if (pdev) - pci_stop_and_remove_bus_device_locked(pdev); - } + if (zdev->zbus->bus) + zpci_remove_device(zdev); switch (zdev->state) { case ZPCI_FN_STATE_ONLINE: diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 4602068c7548..9a3a291cad43 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -121,7 +121,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) if (!zdev) break; if (pdev) - pci_stop_and_remove_bus_device_locked(pdev); + zpci_remove_device(zdev); ret = zpci_disable_device(zdev); if (ret) @@ -140,7 +140,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) /* Give the driver a hint that the function is * already unusable. */ pdev->error_state = pci_channel_io_perm_failure; - pci_stop_and_remove_bus_device_locked(pdev); + zpci_remove_device(zdev); } zdev->state = ZPCI_FN_STATE_STANDBY; diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index b59f84918fe0..c9e790c74051 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -83,21 +83,19 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, hotplug_slot); struct pci_dev *pdev; - struct zpci_bus *zbus = zdev->zbus; int rc; if (!zpci_fn_configured(zdev->state)) return -EIO; - pdev = pci_get_slot(zbus->bus, zdev->devfn); - if (pdev) { - if (pci_num_vf(pdev)) - return -EBUSY; - - pci_stop_and_remove_bus_device_locked(pdev); + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + if (pdev && pci_num_vf(pdev)) { pci_dev_put(pdev); + return -EBUSY; } + zpci_remove_device(zdev); + rc = zpci_disable_device(zdev); if (rc) return rc; From b97bf44f99155e57088e16974afb1f2d7b5287aa Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 3 Aug 2020 17:58:10 +0200 Subject: [PATCH 095/290] s390/pci: fix PF/VF linking on hot plug Currently there are four places in which a PCI function is scanned and made available to drivers: 1. In pci_scan_root_bus() as part of the initial zbus creation. 2. In zpci_bus_add_devices() when registering a device in configured state on a zbus that has already been scanned. 3. When a function is already known to zPCI (in reserved/standby state) and configuration is triggered through firmware by PEC 0x301. 4. When a device is already known to zPCI (in standby/reserved state) and configuration is triggered from within Linux using enable_slot(). The PF/VF linking step and setting of pdev->is_virtfn introduced with commit e5794cf1a270 ("s390/pci: create links between PFs and VFs") was only triggered for the second case, which is where VFs created through sriov_numvfs usually land. However unlike some other platforms but like POWER VFs can be individually enabled/disabled through /sys/bus/pci/slots. Fix this by doing VF setup as part of pcibios_bus_add_device() which is called in all of the above cases. Finally to remove the PF/VF links call the common code pci_iov_remove_virtfn() function to remove linked VFs. This takes care of the necessary sysfs cleanup. Fixes: e5794cf1a270 ("s390/pci: create links between PFs and VFs") Cc: # 5.8: 2f0230b2f2d5: s390/pci: re-introduce zpci_remove_device() Cc: # 5.8 Acked-by: Pierre Morel Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci.c | 5 ++++- arch/s390/pci/pci_bus.c | 27 +++++++++++++++------------ arch/s390/pci/pci_bus.h | 13 +++++++++++++ 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 6e57ff885f8a..4b62d6b55024 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -678,8 +678,11 @@ void zpci_remove_device(struct zpci_dev *zdev) struct pci_dev *pdev; pdev = pci_get_slot(zbus->bus, zdev->devfn); - if (pdev) + if (pdev) { + if (pdev->is_virtfn) + return zpci_remove_virtfn(pdev, zdev->vfn); pci_stop_and_remove_bus_device_locked(pdev); + } } int zpci_create_device(struct zpci_dev *zdev) diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 423af8b1da7b..5967f3014156 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -189,6 +189,19 @@ static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus, } #endif +void pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = to_zpci(pdev); + + /* + * With pdev->no_vf_scan the common PCI probing code does not + * perform PF/VF linking. + */ + if (zdev->vfn) + zpci_bus_setup_virtfn(zdev->zbus, pdev, zdev->vfn); + +} + static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) { struct pci_bus *bus; @@ -219,20 +232,10 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) } pdev = pci_scan_single_device(bus, zdev->devfn); - if (pdev) { - if (!zdev->is_physfn) { - rc = zpci_bus_setup_virtfn(zbus, pdev, zdev->vfn); - if (rc) - goto failed_with_pdev; - } + if (pdev) pci_bus_add_device(pdev); - } - return 0; -failed_with_pdev: - pci_stop_and_remove_bus_device(pdev); - pci_dev_put(pdev); - return rc; + return 0; } static void zpci_bus_add_devices(struct zpci_bus *zbus) diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index 89be3c354b7b..4972433df458 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -29,3 +29,16 @@ static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus, return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn]; } + +#ifdef CONFIG_PCI_IOV +static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) +{ + + pci_lock_rescan_remove(); + /* Linux' vfid's start at 0 vfn at 1 */ + pci_iov_remove_virtfn(pdev->physfn, vfn - 1); + pci_unlock_rescan_remove(); +} +#else /* CONFIG_PCI_IOV */ +static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) {} +#endif /* CONFIG_PCI_IOV */ From 314213c15702f7598c486cf8c94f617719dfe339 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 4 Aug 2020 16:10:43 +0200 Subject: [PATCH 096/290] ASoC: wm8994: Prevent access to invalid VU register bits on WM1811 The ADC2 and DAC2 are not available on WM1811 device. This patch moves the ADC2, DAC2 VU bitfields to a separate array so we can skip accessing them and avoid unreadable register access on WM1811. This allows to get rid of warnings during boot like: wm8994-codec: ASoC: error at soc_component_read_no_lock on wm8994-codec: -5 Signed-off-by: Sylwester Nawrocki Link: https://lore.kernel.org/r/20200804141043.11425-1-s.nawrocki@samsung.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8994.c | 60 ++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index a84ae879d37e..038be667c1a6 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -43,10 +43,12 @@ #define WM8994_NUM_DRC 3 #define WM8994_NUM_EQ 3 -static struct { +struct wm8994_reg_mask { unsigned int reg; unsigned int mask; -} wm8994_vu_bits[] = { +}; + +static struct wm8994_reg_mask wm8994_vu_bits[] = { { WM8994_LEFT_LINE_INPUT_1_2_VOLUME, WM8994_IN1_VU }, { WM8994_RIGHT_LINE_INPUT_1_2_VOLUME, WM8994_IN1_VU }, { WM8994_LEFT_LINE_INPUT_3_4_VOLUME, WM8994_IN2_VU }, @@ -60,14 +62,10 @@ static struct { { WM8994_AIF1_DAC1_LEFT_VOLUME, WM8994_AIF1DAC1_VU }, { WM8994_AIF1_DAC1_RIGHT_VOLUME, WM8994_AIF1DAC1_VU }, - { WM8994_AIF1_DAC2_LEFT_VOLUME, WM8994_AIF1DAC2_VU }, - { WM8994_AIF1_DAC2_RIGHT_VOLUME, WM8994_AIF1DAC2_VU }, { WM8994_AIF2_DAC_LEFT_VOLUME, WM8994_AIF2DAC_VU }, { WM8994_AIF2_DAC_RIGHT_VOLUME, WM8994_AIF2DAC_VU }, { WM8994_AIF1_ADC1_LEFT_VOLUME, WM8994_AIF1ADC1_VU }, { WM8994_AIF1_ADC1_RIGHT_VOLUME, WM8994_AIF1ADC1_VU }, - { WM8994_AIF1_ADC2_LEFT_VOLUME, WM8994_AIF1ADC2_VU }, - { WM8994_AIF1_ADC2_RIGHT_VOLUME, WM8994_AIF1ADC2_VU }, { WM8994_AIF2_ADC_LEFT_VOLUME, WM8994_AIF2ADC_VU }, { WM8994_AIF2_ADC_RIGHT_VOLUME, WM8994_AIF1ADC2_VU }, { WM8994_DAC1_LEFT_VOLUME, WM8994_DAC1_VU }, @@ -76,6 +74,14 @@ static struct { { WM8994_DAC2_RIGHT_VOLUME, WM8994_DAC2_VU }, }; +/* VU bitfields for ADC2, DAC2 not available on WM1811 */ +static struct wm8994_reg_mask wm8994_adc2_dac2_vu_bits[] = { + { WM8994_AIF1_DAC2_LEFT_VOLUME, WM8994_AIF1DAC2_VU }, + { WM8994_AIF1_DAC2_RIGHT_VOLUME, WM8994_AIF1DAC2_VU }, + { WM8994_AIF1_ADC2_LEFT_VOLUME, WM8994_AIF1ADC2_VU }, + { WM8994_AIF1_ADC2_RIGHT_VOLUME, WM8994_AIF1ADC2_VU }, +}; + static int wm8994_drc_base[] = { WM8994_AIF1_DRC1_1, WM8994_AIF1_DRC2_1, @@ -1030,6 +1036,26 @@ static bool wm8994_check_class_w_digital(struct snd_soc_component *component) return true; } +static void wm8994_update_vu_bits(struct snd_soc_component *component) +{ + struct wm8994_priv *wm8994 = snd_soc_component_get_drvdata(component); + struct wm8994 *control = wm8994->wm8994; + int i; + + for (i = 0; i < ARRAY_SIZE(wm8994_vu_bits); i++) + snd_soc_component_write(component, wm8994_vu_bits[i].reg, + snd_soc_component_read(component, + wm8994_vu_bits[i].reg)); + if (control->type == WM1811) + return; + + for (i = 0; i < ARRAY_SIZE(wm8994_adc2_dac2_vu_bits); i++) + snd_soc_component_write(component, + wm8994_adc2_dac2_vu_bits[i].reg, + snd_soc_component_read(component, + wm8994_adc2_dac2_vu_bits[i].reg)); +} + static int aif_mclk_set(struct snd_soc_component *component, int aif, bool enable) { struct wm8994_priv *wm8994 = snd_soc_component_get_drvdata(component); @@ -1076,7 +1102,7 @@ static int aif1clk_ev(struct snd_soc_dapm_widget *w, struct wm8994_priv *wm8994 = snd_soc_component_get_drvdata(component); struct wm8994 *control = wm8994->wm8994; int mask = WM8994_AIF1DAC1L_ENA | WM8994_AIF1DAC1R_ENA; - int ret, i; + int ret; int dac; int adc; int val; @@ -1144,10 +1170,7 @@ static int aif1clk_ev(struct snd_soc_dapm_widget *w, break; case SND_SOC_DAPM_POST_PMU: - for (i = 0; i < ARRAY_SIZE(wm8994_vu_bits); i++) - snd_soc_component_write(component, wm8994_vu_bits[i].reg, - snd_soc_component_read(component, - wm8994_vu_bits[i].reg)); + wm8994_update_vu_bits(component); break; case SND_SOC_DAPM_PRE_PMD: @@ -1181,7 +1204,7 @@ static int aif2clk_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - int ret, i; + int ret; int dac; int adc; int val; @@ -1237,10 +1260,7 @@ static int aif2clk_ev(struct snd_soc_dapm_widget *w, break; case SND_SOC_DAPM_POST_PMU: - for (i = 0; i < ARRAY_SIZE(wm8994_vu_bits); i++) - snd_soc_component_write(component, wm8994_vu_bits[i].reg, - snd_soc_component_read(component, - wm8994_vu_bits[i].reg)); + wm8994_update_vu_bits(component); break; case SND_SOC_DAPM_PRE_PMD: @@ -4346,6 +4366,14 @@ static int wm8994_component_probe(struct snd_soc_component *component) wm8994_vu_bits[i].mask, wm8994_vu_bits[i].mask); + if (control->type != WM1811) { + for (i = 0; i < ARRAY_SIZE(wm8994_adc2_dac2_vu_bits); i++) + snd_soc_component_update_bits(component, + wm8994_adc2_dac2_vu_bits[i].reg, + wm8994_adc2_dac2_vu_bits[i].mask, + wm8994_adc2_dac2_vu_bits[i].mask); + } + /* Set the low bit of the 3D stereo depth so TLV matches */ snd_soc_component_update_bits(component, WM8994_AIF1_DAC1_FILTERS_2, 1 << WM8994_AIF1DAC1_3D_GAIN_SHIFT, From ff69c97ef84c9f7795adb49e9f07c9adcdd0c288 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 11 Aug 2020 11:34:52 +0100 Subject: [PATCH 097/290] ASoC: msm8916-wcd-analog: fix register Interrupt offset For some reason interrupt set and clear register offsets are not set correctly. This patch corrects them! Fixes: 585e881e5b9e ("ASoC: codecs: Add msm8916-wcd analog codec") Signed-off-by: Srinivas Kandagatla Tested-by: Stephan Gerhold Reviewed-by: Stephan Gerhold Link: https://lore.kernel.org/r/20200811103452.20448-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/msm8916-wcd-analog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 4428c62e25cf..3ddd822240e3 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -19,8 +19,8 @@ #define CDC_D_REVISION1 (0xf000) #define CDC_D_PERPH_SUBTYPE (0xf005) -#define CDC_D_INT_EN_SET (0x015) -#define CDC_D_INT_EN_CLR (0x016) +#define CDC_D_INT_EN_SET (0xf015) +#define CDC_D_INT_EN_CLR (0xf016) #define MBHC_SWITCH_INT BIT(7) #define MBHC_MIC_ELECTRICAL_INS_REM_DET BIT(6) #define MBHC_BUTTON_PRESS_DET BIT(5) From f082bb59b72039a2326ec1a44496899fb8aa6d0e Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Fri, 31 Jul 2020 19:38:34 +0200 Subject: [PATCH 098/290] ASoC: wm8994: Avoid attempts to read unreadable registers The driver supports WM1811, WM8994, WM8958 devices but according to documentation and the regmap definitions the WM8958_DSP2_* registers are only available on WM8958. In current code these registers are being accessed as if they were available on all the three chips. When starting playback on WM1811 CODEC multiple errors like: "wm8994-codec wm8994-codec: ASoC: error at soc_component_read_no_lock on wm8994-codec: -5" can be seen, which is caused by attempts to read an unavailable WM8958_DSP2_PROGRAM register. The issue has been uncovered by recent commit "e2329ee ASoC: soc-component: add soc_component_err()". This patch adds a check in wm8958_aif_ev() callback so the DSP2 handling is only done for WM8958. Signed-off-by: Sylwester Nawrocki Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20200731173834.23832-1-s.nawrocki@samsung.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8958-dsp2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c index 68a3b48e6b31..3bce9a14f0f3 100644 --- a/sound/soc/codecs/wm8958-dsp2.c +++ b/sound/soc/codecs/wm8958-dsp2.c @@ -412,8 +412,12 @@ int wm8958_aif_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct wm8994 *control = dev_get_drvdata(component->dev->parent); int i; + if (control->type != WM8958) + return 0; + switch (event) { case SND_SOC_DAPM_POST_PMU: case SND_SOC_DAPM_PRE_PMU: From 062fa09f44f4fb3776a23184d5d296b0c8872eb9 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 13 Aug 2020 16:41:10 +0800 Subject: [PATCH 099/290] ASoC: intel: Fix memleak in sst_media_open When power_up_sst() fails, stream needs to be freed just like when try_module_get() fails. However, current code is returning directly and ends up leaking memory. Fixes: 0121327c1a68b ("ASoC: Intel: mfld-pcm: add control for powering up/down dsp") Signed-off-by: Dinghao Liu Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200813084112.26205-1-dinghao.liu@zju.edu.cn Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst-mfld-platform-pcm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 49b9f18472bc..b1cac7abdc0a 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -331,7 +331,7 @@ static int sst_media_open(struct snd_pcm_substream *substream, ret_val = power_up_sst(stream); if (ret_val < 0) - return ret_val; + goto out_power_up; /* Make sure, that the period size is always even */ snd_pcm_hw_constraint_step(substream->runtime, 0, @@ -340,8 +340,9 @@ static int sst_media_open(struct snd_pcm_substream *substream, return snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); out_ops: - kfree(stream); mutex_unlock(&sst_lock); +out_power_up: + kfree(stream); return ret_val; } From 1e4e4bcaf70ec89f8b499c93a83d078c1e5c0ea6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 17 Aug 2020 16:03:01 +0530 Subject: [PATCH 100/290] powerpc/pkeys: Fix build error with PPC_MEM_KEYS disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IS_ENABLED() instead of #ifdef still requires variable declaration. In this specific case, default_uamor is declared in asm/pkeys.h which is only included if PPC_MEM_KEYS is enabled. arch/powerpc/mm/book3s64/hash_utils.c: In function ‘hash__early_init_mmu_secondary’: arch/powerpc/mm/book3s64/hash_utils.c:1119:21: error: ‘default_uamor’ undeclared (first use in this function) 1119 | mtspr(SPRN_UAMOR, default_uamor); | ^~~~~~~~~~~~~ Fixes: 6553fb799f60 ("powerpc/pkeys: Fix boot failures with Nemo board (A-EON AmigaOne X1000)") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200817103301.158836-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/hash_utils.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 1da9dbba9217..890a71c5293e 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1115,8 +1115,10 @@ void hash__early_init_mmu_secondary(void) && cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); - if (IS_ENABLED(CONFIG_PPC_MEM_KEYS) && mmu_has_feature(MMU_FTR_PKEY)) +#ifdef CONFIG_PPC_MEM_KEYS + if (mmu_has_feature(MMU_FTR_PKEY)) mtspr(SPRN_UAMOR, default_uamor); +#endif } #endif /* CONFIG_SMP */ From fdc6edbb31fba76fd25d7bd016b675a92908d81e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 17 Aug 2020 06:03:26 +0000 Subject: [PATCH 101/290] powerpc/fixmap: Fix the size of the early debug area Commit ("03fd42d458fb powerpc/fixmap: Fix FIX_EARLY_DEBUG_BASE when page size is 256k") reworked the setup of the early debug area and mistakenly replaced 128 * 1024 by SZ_128. Change to SZ_128K to restore the original 128 kbytes size of the area. Fixes: 03fd42d458fb ("powerpc/fixmap: Fix FIX_EARLY_DEBUG_BASE when page size is 256k") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/996184974d674ff984643778cf1cdd7fe58cc065.1597644194.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/fixmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 925cf89cbf4b..6bfc87915d5d 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -52,7 +52,7 @@ enum fixed_addresses { FIX_HOLE, /* reserve the top 128K for early debugging purposes */ FIX_EARLY_DEBUG_TOP = FIX_HOLE, - FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128, PAGE_SIZE)/PAGE_SIZE)-1, + FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, From a97ea93ed5b64704a2171c505355c12ab427b1b1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 21 Jul 2020 21:57:43 -0700 Subject: [PATCH 102/290] Makefile.extrawarn: Move sign-compare from W=2 to W=3 This -Wsign-compare compiler warning can be very noisy and most of the suggested conversions are unnecessary. Make the warning W=3 so it's described under the "can most likely be ignored" block. Signed-off-by: Joe Perches Signed-off-by: Masahiro Yamada --- scripts/Makefile.extrawarn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 62c275685b75..95e4cdb94fe9 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -66,7 +66,6 @@ KBUILD_CFLAGS += -Wnested-externs KBUILD_CFLAGS += -Wshadow KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) KBUILD_CFLAGS += -Wmissing-field-initializers -KBUILD_CFLAGS += -Wsign-compare KBUILD_CFLAGS += -Wtype-limits KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) @@ -87,6 +86,7 @@ KBUILD_CFLAGS += -Wpacked KBUILD_CFLAGS += -Wpadded KBUILD_CFLAGS += -Wpointer-arith KBUILD_CFLAGS += -Wredundant-decls +KBUILD_CFLAGS += -Wsign-compare KBUILD_CFLAGS += -Wswitch-default KBUILD_CFLAGS += $(call cc-option, -Wpacked-bitfield-compat) From bc93b9ae0151ae5ad5b8504cdc598428ea99570b Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 17 Aug 2020 11:08:18 -0600 Subject: [PATCH 103/290] vfio-pci: Avoid recursive read-lock usage A down_read on memory_lock is held when performing read/write accesses to MMIO BAR space, including across the copy_to/from_user() callouts which may fault. If the user buffer for these copies resides in an mmap of device MMIO space, the mmap fault handler will acquire a recursive read-lock on memory_lock. Avoid this by reducing the lock granularity. Sequential accesses requiring multiple ioread/iowrite cycles are expected to be rare, therefore typical accesses should not see additional overhead. VGA MMIO accesses are expected to be non-fatal regardless of the PCI memory enable bit to allow legacy probing, this behavior remains with a comment added. ioeventfds are now included in memory access testing, with writes dropped while memory space is disabled. Fixes: abafbc551fdd ("vfio-pci: Invalidate mmaps and block MMIO access on disabled memory") Reported-by: Zhiyi Guo Tested-by: Zhiyi Guo Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_private.h | 2 + drivers/vfio/pci/vfio_pci_rdwr.c | 120 ++++++++++++++++++++++------ 2 files changed, 98 insertions(+), 24 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 86a02aff8735..61ca8ab165dc 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -33,12 +33,14 @@ struct vfio_pci_ioeventfd { struct list_head next; + struct vfio_pci_device *vdev; struct virqfd *virqfd; void __iomem *addr; uint64_t data; loff_t pos; int bar; int count; + bool test_mem; }; struct vfio_pci_irq_ctx { diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 916b184df3a5..9e353c484ace 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -37,17 +37,70 @@ #define vfio_ioread8 ioread8 #define vfio_iowrite8 iowrite8 +#define VFIO_IOWRITE(size) \ +static int vfio_pci_iowrite##size(struct vfio_pci_device *vdev, \ + bool test_mem, u##size val, void __iomem *io) \ +{ \ + if (test_mem) { \ + down_read(&vdev->memory_lock); \ + if (!__vfio_pci_memory_enabled(vdev)) { \ + up_read(&vdev->memory_lock); \ + return -EIO; \ + } \ + } \ + \ + vfio_iowrite##size(val, io); \ + \ + if (test_mem) \ + up_read(&vdev->memory_lock); \ + \ + return 0; \ +} + +VFIO_IOWRITE(8) +VFIO_IOWRITE(16) +VFIO_IOWRITE(32) +#ifdef iowrite64 +VFIO_IOWRITE(64) +#endif + +#define VFIO_IOREAD(size) \ +static int vfio_pci_ioread##size(struct vfio_pci_device *vdev, \ + bool test_mem, u##size *val, void __iomem *io) \ +{ \ + if (test_mem) { \ + down_read(&vdev->memory_lock); \ + if (!__vfio_pci_memory_enabled(vdev)) { \ + up_read(&vdev->memory_lock); \ + return -EIO; \ + } \ + } \ + \ + *val = vfio_ioread##size(io); \ + \ + if (test_mem) \ + up_read(&vdev->memory_lock); \ + \ + return 0; \ +} + +VFIO_IOREAD(8) +VFIO_IOREAD(16) +VFIO_IOREAD(32) + /* * Read or write from an __iomem region (MMIO or I/O port) with an excluded * range which is inaccessible. The excluded range drops writes and fills * reads with -1. This is intended for handling MSI-X vector tables and * leftover space for ROM BARs. */ -static ssize_t do_io_rw(void __iomem *io, char __user *buf, +static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem, + void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start, size_t x_end, bool iswrite) { ssize_t done = 0; + int ret; while (count) { size_t fillable, filled; @@ -66,9 +119,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf, if (copy_from_user(&val, buf, 4)) return -EFAULT; - vfio_iowrite32(val, io + off); + ret = vfio_pci_iowrite32(vdev, test_mem, + val, io + off); + if (ret) + return ret; } else { - val = vfio_ioread32(io + off); + ret = vfio_pci_ioread32(vdev, test_mem, + &val, io + off); + if (ret) + return ret; if (copy_to_user(buf, &val, 4)) return -EFAULT; @@ -82,9 +141,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf, if (copy_from_user(&val, buf, 2)) return -EFAULT; - vfio_iowrite16(val, io + off); + ret = vfio_pci_iowrite16(vdev, test_mem, + val, io + off); + if (ret) + return ret; } else { - val = vfio_ioread16(io + off); + ret = vfio_pci_ioread16(vdev, test_mem, + &val, io + off); + if (ret) + return ret; if (copy_to_user(buf, &val, 2)) return -EFAULT; @@ -98,9 +163,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf, if (copy_from_user(&val, buf, 1)) return -EFAULT; - vfio_iowrite8(val, io + off); + ret = vfio_pci_iowrite8(vdev, test_mem, + val, io + off); + if (ret) + return ret; } else { - val = vfio_ioread8(io + off); + ret = vfio_pci_ioread8(vdev, test_mem, + &val, io + off); + if (ret) + return ret; if (copy_to_user(buf, &val, 1)) return -EFAULT; @@ -178,14 +249,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, count = min(count, (size_t)(end - pos)); - if (res->flags & IORESOURCE_MEM) { - down_read(&vdev->memory_lock); - if (!__vfio_pci_memory_enabled(vdev)) { - up_read(&vdev->memory_lock); - return -EIO; - } - } - if (bar == PCI_ROM_RESOURCE) { /* * The ROM can fill less space than the BAR, so we start the @@ -213,7 +276,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, x_end = vdev->msix_offset + vdev->msix_size; } - done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite); + done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos, + count, x_start, x_end, iswrite); if (done >= 0) *ppos += done; @@ -221,9 +285,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, if (bar == PCI_ROM_RESOURCE) pci_unmap_rom(pdev, io); out: - if (res->flags & IORESOURCE_MEM) - up_read(&vdev->memory_lock); - return done; } @@ -278,7 +339,12 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, return ret; } - done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); + /* + * VGA MMIO is a legacy, non-BAR resource that hopefully allows + * probing, so we don't currently worry about access in relation + * to the memory enable bit in the command register. + */ + done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite); vga_put(vdev->pdev, rsrc); @@ -296,17 +362,21 @@ static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) switch (ioeventfd->count) { case 1: - vfio_iowrite8(ioeventfd->data, ioeventfd->addr); + vfio_pci_iowrite8(ioeventfd->vdev, ioeventfd->test_mem, + ioeventfd->data, ioeventfd->addr); break; case 2: - vfio_iowrite16(ioeventfd->data, ioeventfd->addr); + vfio_pci_iowrite16(ioeventfd->vdev, ioeventfd->test_mem, + ioeventfd->data, ioeventfd->addr); break; case 4: - vfio_iowrite32(ioeventfd->data, ioeventfd->addr); + vfio_pci_iowrite32(ioeventfd->vdev, ioeventfd->test_mem, + ioeventfd->data, ioeventfd->addr); break; #ifdef iowrite64 case 8: - vfio_iowrite64(ioeventfd->data, ioeventfd->addr); + vfio_pci_iowrite64(ioeventfd->vdev, ioeventfd->test_mem, + ioeventfd->data, ioeventfd->addr); break; #endif } @@ -378,11 +448,13 @@ long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset, goto out_unlock; } + ioeventfd->vdev = vdev; ioeventfd->addr = vdev->barmap[bar] + pos; ioeventfd->data = data; ioeventfd->pos = pos; ioeventfd->bar = bar; ioeventfd->count = count; + ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM; ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler, NULL, NULL, &ioeventfd->virqfd, fd); From aae7a75a821a793ed6b8ad502a5890fb8e8f172d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 17 Aug 2020 11:09:13 -0600 Subject: [PATCH 104/290] vfio/type1: Add proper error unwind for vfio_iommu_replay() The vfio_iommu_replay() function does not currently unwind on error, yet it does pin pages, perform IOMMU mapping, and modify the vfio_dma structure to indicate IOMMU mapping. The IOMMU mappings are torn down when the domain is destroyed, but the other actions go on to cause trouble later. For example, the iommu->domain_list can be empty if we only have a non-IOMMU backed mdev attached. We don't currently check if the list is empty before getting the first entry in the list, which leads to a bogus domain pointer. If a vfio_dma entry is erroneously marked as iommu_mapped, we'll attempt to use that bogus pointer to retrieve the existing physical page addresses. This is the scenario that uncovered this issue, attempting to hot-add a vfio-pci device to a container with an existing mdev device and DMA mappings, one of which could not be pinned, causing a failure adding the new group to the existing container and setting the conditions for a subsequent attempt to explode. To resolve this, we can first check if the domain_list is empty so that we can reject replay of a bogus domain, should we ever encounter this inconsistent state again in the future. The real fix though is to add the necessary unwind support, which means cleaning up the current pinning if an IOMMU mapping fails, then walking back through the r-b tree of DMA entries, reading from the IOMMU which ranges are mapped, and unmapping and unpinning those ranges. To be able to do this, we also defer marking the DMA entry as IOMMU mapped until all entries are processed, in order to allow the unwind to know the disposition of each entry. Fixes: a54eb55045ae ("vfio iommu type1: Add support for mediated devices") Reported-by: Zhiyi Guo Tested-by: Zhiyi Guo Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 71 ++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 6990fc711a80..c992973cc2d5 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1424,13 +1424,16 @@ static int vfio_bus_type(struct device *dev, void *data) static int vfio_iommu_replay(struct vfio_iommu *iommu, struct vfio_domain *domain) { - struct vfio_domain *d; + struct vfio_domain *d = NULL; struct rb_node *n; unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; int ret; /* Arbitrarily pick the first domain in the list for lookups */ - d = list_first_entry(&iommu->domain_list, struct vfio_domain, next); + if (!list_empty(&iommu->domain_list)) + d = list_first_entry(&iommu->domain_list, + struct vfio_domain, next); + n = rb_first(&iommu->dma_list); for (; n; n = rb_next(n)) { @@ -1448,6 +1451,11 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, phys_addr_t p; dma_addr_t i; + if (WARN_ON(!d)) { /* mapped w/o a domain?! */ + ret = -EINVAL; + goto unwind; + } + phys = iommu_iova_to_phys(d->domain, iova); if (WARN_ON(!phys)) { @@ -1477,7 +1485,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; - return ret; + goto unwind; } phys = pfn << PAGE_SHIFT; @@ -1486,14 +1494,67 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, ret = iommu_map(domain->domain, iova, phys, size, dma->prot | domain->prot); - if (ret) - return ret; + if (ret) { + if (!dma->iommu_mapped) + vfio_unpin_pages_remote(dma, iova, + phys >> PAGE_SHIFT, + size >> PAGE_SHIFT, + true); + goto unwind; + } iova += size; } + } + + /* All dmas are now mapped, defer to second tree walk for unwind */ + for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); + dma->iommu_mapped = true; } + return 0; + +unwind: + for (; n; n = rb_prev(n)) { + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); + dma_addr_t iova; + + if (dma->iommu_mapped) { + iommu_unmap(domain->domain, dma->iova, dma->size); + continue; + } + + iova = dma->iova; + while (iova < dma->iova + dma->size) { + phys_addr_t phys, p; + size_t size; + dma_addr_t i; + + phys = iommu_iova_to_phys(domain->domain, iova); + if (!phys) { + iova += PAGE_SIZE; + continue; + } + + size = PAGE_SIZE; + p = phys + size; + i = iova + size; + while (i < dma->iova + dma->size && + p == iommu_iova_to_phys(domain->domain, i)) { + size += PAGE_SIZE; + p += PAGE_SIZE; + i += PAGE_SIZE; + } + + iommu_unmap(domain->domain, iova, size); + vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT, + size >> PAGE_SHIFT, true); + } + } + + return ret; } /* From 98b0bf02738004829d7e26d6cb47b2e469aaba86 Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Fri, 14 Aug 2020 21:21:05 +0800 Subject: [PATCH 105/290] selftests: kvm: Use a shorter encoding to clear RAX If debug_regs.c is built with newer binutils, the resulting binary is "optimized" by the assembler: asm volatile("ss_start: " "xor %%rax,%%rax\n\t" "cpuid\n\t" "movl $0x1a0,%%ecx\n\t" "rdmsr\n\t" : : : "rax", "ecx"); is translated to : 000000000040194e : 40194e: 31 c0 xor %eax,%eax <----- rax->eax? 401950: 0f a2 cpuid 401952: b9 a0 01 00 00 mov $0x1a0,%ecx 401957: 0f 32 rdmsr As you can see rax is replaced with eax in target binary code. This causes a difference is the length of xor instruction (2 Byte vs 3 Byte), and makes the hard-coded instruction length check fail: /* Instruction lengths starting at ss_start */ int ss_size[4] = { 3, /* xor */ <-------- 2 or 3? 2, /* cpuid */ 5, /* mov */ 2, /* rdmsr */ }; Encode the shorter version directly and, while at it, fix the "clobbers" of the asm. Cc: stable@vger.kernel.org Signed-off-by: Yang Weijiang Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/debug_regs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index 8162c58a1234..b8d14f9db5f9 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -40,11 +40,11 @@ static void guest_code(void) /* Single step test, covers 2 basic instructions and 2 emulated */ asm volatile("ss_start: " - "xor %%rax,%%rax\n\t" + "xor %%eax,%%eax\n\t" "cpuid\n\t" "movl $0x1a0,%%ecx\n\t" "rdmsr\n\t" - : : : "rax", "ecx"); + : : : "eax", "ebx", "ecx", "edx"); /* DR6.BD test */ asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax"); From d8d0db7bb358ef65d60726a61bfcd08eccff0bc0 Mon Sep 17 00:00:00 2001 From: Tom Yan Date: Tue, 18 Aug 2020 01:20:11 +0800 Subject: [PATCH 106/290] ALSA: usb-audio: ignore broken processing/extension unit Some devices have broken extension unit where getting current value doesn't work. Attempt that once when creating mixer control for it. If it fails, just ignore it, so that it won't cripple the device entirely (and/or make the error floods). Signed-off-by: Tom Yan Link: https://lore.kernel.org/r/5f3abc52.1c69fb81.9cf2.fe91@mx.google.com Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 6b0f3a8469ef..81e987eaf063 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2371,7 +2371,7 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, int num_ins; struct usb_mixer_elem_info *cval; struct snd_kcontrol *kctl; - int i, err, nameid, type, len; + int i, err, nameid, type, len, val; const struct procunit_info *info; const struct procunit_value_info *valinfo; const struct usbmix_name_map *map; @@ -2474,6 +2474,12 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, break; } + err = get_cur_ctl_value(cval, cval->control << 8, &val); + if (err < 0) { + usb_mixer_elem_info_free(cval); + return -EINVAL; + } + kctl = snd_ctl_new1(&mixer_procunit_ctl, cval); if (!kctl) { usb_mixer_elem_info_free(cval); From bd05220c7be3356046861c317d9c287ca50445ba Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Tue, 11 Aug 2020 19:24:57 +0100 Subject: [PATCH 107/290] arch/ia64: Restore arch-specific pgd_offset_k implementation IA-64 is special and treats pgd_offset_k() differently to pgd_offset(), using different formulae to calculate the indices into the kernel and user PGDs. The index into the user PGDs takes into account the region number, but the index into the kernel (init_mm) PGD always assumes a predefined kernel region number. Commit 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions") made IA-64 use a generic pgd_offset_k() which incorrectly used pgd_index() for kernel page tables. As a result, the index into the kernel PGD was going out of bounds and the kernel hung during early boot. Allow overrides of pgd_offset_k() and override it on IA-64 with the old implementation that will correctly index the kernel PGD. Fixes: 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions") Reported-by: John Paul Adrian Glaubitz Signed-off-by: Jessica Clarke Tested-by: John Paul Adrian Glaubitz Acked-by: Tony Luck Signed-off-by: Mike Rapoport --- arch/ia64/include/asm/pgtable.h | 9 +++++++++ include/linux/pgtable.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 10850897a91c..779b6972aa84 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -366,6 +366,15 @@ pgd_index (unsigned long address) } #define pgd_index pgd_index +/* + * In the kernel's mapped region we know everything is in region number 5, so + * as an optimisation its PGD already points to the area for that region. + * However, this also means that we cannot use pgd_index() and we must + * never add the region here. + */ +#define pgd_offset_k(addr) \ + (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))) + /* Look up a pgd entry in the gate area. On IA-64, the gate-area resides in the kernel-mapped segment, hence we use pgd_offset_k() here. */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a124c21e3204..e8cbc2e795d5 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -117,7 +117,9 @@ static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address) * a shortcut which implies the use of the kernel's pgd, instead * of a process's */ +#ifndef pgd_offset_k #define pgd_offset_k(address) pgd_offset(&init_mm, (address)) +#endif /* * In many cases it is known that a virtual address is mapped at PMD or PTE From 19cf4b7eefc3040192bccb10c322e4c831bb0eb0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 17 Aug 2020 13:53:04 -0400 Subject: [PATCH 108/290] KVM: x86: fix access code passed to gva_to_gpa The PK bit of the error code is computed dynamically in permission_fault and therefore need not be passed to gva_to_gpa: only the access bits (fetch, user, write) need to be passed down. Not doing so causes a splat in the pku test: WARNING: CPU: 25 PID: 5465 at arch/x86/kvm/mmu.h:197 paging64_walk_addr_generic+0x594/0x750 [kvm] Hardware name: Intel Corporation WilsonCity/WilsonCity, BIOS WLYDCRB1.SYS.0014.D62.2001092233 01/09/2020 RIP: 0010:paging64_walk_addr_generic+0x594/0x750 [kvm] Code: <0f> 0b e9 db fe ff ff 44 8b 43 04 4c 89 6c 24 30 8b 13 41 39 d0 89 RSP: 0018:ff53778fc623fb60 EFLAGS: 00010202 RAX: 0000000000000001 RBX: ff53778fc623fbf0 RCX: 0000000000000007 RDX: 0000000000000001 RSI: 0000000000000002 RDI: ff4501efba818000 RBP: 0000000000000020 R08: 0000000000000005 R09: 00000000004000e7 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000007 R13: ff4501efba818388 R14: 10000000004000e7 R15: 0000000000000000 FS: 00007f2dcf31a700(0000) GS:ff4501f1c8040000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000001dea475005 CR4: 0000000000763ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: paging64_gva_to_gpa+0x3f/0xb0 [kvm] kvm_fixup_and_inject_pf_error+0x48/0xa0 [kvm] handle_exception_nmi+0x4fc/0x5b0 [kvm_intel] kvm_arch_vcpu_ioctl_run+0x911/0x1c10 [kvm] kvm_vcpu_ioctl+0x23e/0x5d0 [kvm] ksys_ioctl+0x92/0xb0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x3e/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace d17eb998aee991da ]--- Reported-by: Sean Christopherson Fixes: 897861479c064 ("KVM: x86: Add helper functions for illegal GPA checking and page fault injection") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2db369a64f29..fcfd79a02e85 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10743,9 +10743,11 @@ EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value); void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code) { struct x86_exception fault; + u32 access = error_code & + (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK); if (!(error_code & PFERR_PRESENT_MASK) || - vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, error_code, &fault) != UNMAPPED_GVA) { + vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, &fault) != UNMAPPED_GVA) { /* * If vcpu->arch.walk_mmu->gva_to_gpa succeeded, the page * tables probably do not match the TLB. Just proceed From 427890aff8558eb4326e723835e0eae0e6fe3102 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 17 Aug 2020 11:16:55 -0700 Subject: [PATCH 109/290] kvm: x86: Toggling CR4.SMAP does not load PDPTEs in PAE mode See the SDM, volume 3, section 4.4.1: If PAE paging would be in use following an execution of MOV to CR0 or MOV to CR4 (see Section 4.1.1) and the instruction is modifying any of CR0.CD, CR0.NW, CR0.PG, CR4.PAE, CR4.PGE, CR4.PSE, or CR4.SMEP; then the PDPTEs are loaded from the address in CR3. Fixes: 0be0226f07d14 ("KVM: MMU: fix SMAP virtualization") Cc: Xiao Guangrong Signed-off-by: Jim Mattson Reviewed-by: Peter Shier Reviewed-by: Oliver Upton Message-Id: <20200817181655.3716509-2-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fcfd79a02e85..ecc3470b279e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,7 +975,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; + X86_CR4_SMEP | X86_CR4_PKE; if (kvm_valid_cr4(vcpu, cr4)) return 1; From cb957adb4ea422bd758568df5b2478ea3bb34f35 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 17 Aug 2020 11:16:54 -0700 Subject: [PATCH 110/290] kvm: x86: Toggling CR4.PKE does not load PDPTEs in PAE mode See the SDM, volume 3, section 4.4.1: If PAE paging would be in use following an execution of MOV to CR0 or MOV to CR4 (see Section 4.1.1) and the instruction is modifying any of CR0.CD, CR0.NW, CR0.PG, CR4.PAE, CR4.PGE, CR4.PSE, or CR4.SMEP; then the PDPTEs are loaded from the address in CR3. Fixes: b9baba8614890 ("KVM, pkeys: expose CPUID/CR4 to guest") Cc: Huaitong Han Signed-off-by: Jim Mattson Reviewed-by: Peter Shier Reviewed-by: Oliver Upton Message-Id: <20200817181655.3716509-1-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ecc3470b279e..539ea1cd6020 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,7 +975,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | - X86_CR4_SMEP | X86_CR4_PKE; + X86_CR4_SMEP; if (kvm_valid_cr4(vcpu, cr4)) return 1; From f49c7faf776f16607c948d852a03b04a88c3b583 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 17 Aug 2020 12:32:08 +0100 Subject: [PATCH 111/290] of/address: check for invalid range.cpu_addr Currently invalid CPU addresses are not being sanity checked resulting in SATA setup failure on a SynQuacer SC2A11 development machine. The original check was removed by and earlier commit, so add a sanity check back in to avoid this regression. Fixes: 7a8b64d17e35 ("of/address: use range parser for of_dma_get_range") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20200817113208.523805-1-colin.king@canonical.com Signed-off-by: Rob Herring --- drivers/of/address.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/of/address.c b/drivers/of/address.c index 590493e04b01..945b3d785f44 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -985,6 +985,11 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz /* Don't error out as we'd break some existing DTs */ continue; } + if (range.cpu_addr == OF_BAD_ADDR) { + pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n", + range.bus_addr, node); + continue; + } dma_offset = range.cpu_addr - range.bus_addr; /* Take lower and upper limits */ From cf28f3bbfca097d956f9021cb710dfad56adcc62 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 17 Aug 2020 10:42:14 -0700 Subject: [PATCH 112/290] bpf: Use get_file_rcu() instead of get_file() for task_file iterator With latest `bpftool prog` command, we observed the following kernel panic. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page PGD dfe894067 P4D dfe894067 PUD deb663067 PMD 0 Oops: 0010 [#1] SMP CPU: 9 PID: 6023 ... RIP: 0010:0x0 Code: Bad RIP value. RSP: 0000:ffffc900002b8f18 EFLAGS: 00010286 RAX: ffff8883a405f400 RBX: ffff888e46a6bf00 RCX: 000000008020000c RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8883a405f400 RBP: ffff888e46a6bf50 R08: 0000000000000000 R09: ffffffff81129600 R10: ffff8883a405f300 R11: 0000160000000000 R12: 0000000000002710 R13: 000000e9494b690c R14: 0000000000000202 R15: 0000000000000009 FS: 00007fd9187fe700(0000) GS:ffff888e46a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 0000000de5d33002 CR4: 0000000000360ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rcu_core+0x1a4/0x440 __do_softirq+0xd3/0x2c8 irq_exit+0x9d/0xa0 smp_apic_timer_interrupt+0x68/0x120 apic_timer_interrupt+0xf/0x20 RIP: 0033:0x47ce80 Code: Bad RIP value. RSP: 002b:00007fd9187fba40 EFLAGS: 00000206 ORIG_RAX: ffffffffffffff13 RAX: 0000000000000002 RBX: 00007fd931789160 RCX: 000000000000010c RDX: 00007fd9308cdfb4 RSI: 00007fd9308cdfb4 RDI: 00007ffedd1ea0a8 RBP: 00007fd9187fbab0 R08: 000000000000000e R09: 000000000000002a R10: 0000000000480210 R11: 00007fd9187fc570 R12: 00007fd9316cc400 R13: 0000000000000118 R14: 00007fd9308cdfb4 R15: 00007fd9317a9380 After further analysis, the bug is triggered by Commit eaaacd23910f ("bpf: Add task and task/file iterator targets") which introduced task_file bpf iterator, which traverses all open file descriptors for all tasks in the current namespace. The latest `bpftool prog` calls a task_file bpf program to traverse all files in the system in order to associate processes with progs/maps, etc. When traversing files for a given task, rcu read_lock is taken to access all files in a file_struct. But it used get_file() to grab a file, which is not right. It is possible file->f_count is 0 and get_file() will unconditionally increase it. Later put_file() may cause all kind of issues with the above as one of sympotoms. The failure can be reproduced with the following steps in a few seconds: $ cat t.c #include #include #include #include #include #define N 10000 int fd[N]; int main() { int i; for (i = 0; i < N; i++) { fd[i] = open("./note.txt", 'r'); if (fd[i] < 0) { fprintf(stderr, "failed\n"); return -1; } } for (i = 0; i < N; i++) close(fd[i]); return 0; } $ gcc -O2 t.c $ cat run.sh #/bin/bash for i in {1..100} do while true; do ./a.out; done & done $ ./run.sh $ while true; do bpftool prog >& /dev/null; done This patch used get_file_rcu() which only grabs a file if the file->f_count is not zero. This is to ensure the file pointer is always valid. The above reproducer did not fail for more than 30 minutes. Fixes: eaaacd23910f ("bpf: Add task and task/file iterator targets") Suggested-by: Josef Bacik Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Reviewed-by: Josef Bacik Link: https://lore.kernel.org/bpf/20200817174214.252601-1-yhs@fb.com --- kernel/bpf/task_iter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 232df29793e9..f21b5e1e4540 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -178,10 +178,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, f = fcheck_files(curr_files, curr_fd); if (!f) continue; + if (!get_file_rcu(f)) + continue; /* set info->fd */ info->fd = curr_fd; - get_file(f); rcu_read_unlock(); return f; } From 7c2308f79fc81ba0bf24ccd2429fb483a91bcd51 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Aug 2020 14:48:43 +0200 Subject: [PATCH 113/290] PCI/P2PDMA: Fix build without DMA ops My commit to make DMA ops support optional missed the reference in the p2pdma code. And while the build bot didn't manage to find a config where this can happen, Matthew did. Fix this by replacing two IS_ENABLED checks with ifdefs. Fixes: 2f9237d4f6df ("dma-mapping: make support for dma ops optional") Link: https://lore.kernel.org/r/20200810124843.1532738-1-hch@lst.de Reported-by: Matthew Wilcox Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe --- drivers/pci/p2pdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 64ebed129dbf..f357f9a32b3a 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -556,13 +556,14 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, return -1; for (i = 0; i < num_clients; i++) { - if (IS_ENABLED(CONFIG_DMA_VIRT_OPS) && - clients[i]->dma_ops == &dma_virt_ops) { +#ifdef CONFIG_DMA_VIRT_OPS + if (clients[i]->dma_ops == &dma_virt_ops) { if (verbose) dev_warn(clients[i], "cannot be used for peer-to-peer DMA because the driver makes use of dma_virt_ops\n"); return -1; } +#endif pci_client = find_parent_pci_dev(clients[i]); if (!pci_client) { @@ -842,9 +843,10 @@ static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, * this should never happen because it will be prevented * by the check in pci_p2pdma_distance_many() */ - if (WARN_ON_ONCE(IS_ENABLED(CONFIG_DMA_VIRT_OPS) && - dev->dma_ops == &dma_virt_ops)) +#ifdef CONFIG_DMA_VIRT_OPS + if (WARN_ON_ONCE(dev->dma_ops == &dma_virt_ops)) return 0; +#endif for_each_sg(sg, s, nents, i) { paddr = sg_phys(s); From 2138d1c918246e3d8193c3cb8b6d22d0bb888061 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Thu, 6 Aug 2020 15:01:35 +0800 Subject: [PATCH 114/290] scsi: ufs: ti-j721e-ufs: Fix error return in ti_j721e_ufs_probe() Fix to return error code PTR_ERR() from the error handling case instead of 0. Link: https://lore.kernel.org/r/20200806070135.67797-1-jingxiangfeng@huawei.com Fixes: 22617e216331 ("scsi: ufs: ti-j721e-ufs: Fix unwinding of pm_runtime changes") Reviewed-by: Avri Altman Signed-off-by: Jing Xiangfeng Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ti-j721e-ufs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ufs/ti-j721e-ufs.c b/drivers/scsi/ufs/ti-j721e-ufs.c index 46bb905b4d6a..eafe0db98d54 100644 --- a/drivers/scsi/ufs/ti-j721e-ufs.c +++ b/drivers/scsi/ufs/ti-j721e-ufs.c @@ -38,6 +38,7 @@ static int ti_j721e_ufs_probe(struct platform_device *pdev) /* Select MPHY refclk frequency */ clk = devm_clk_get(dev, NULL); if (IS_ERR(clk)) { + ret = PTR_ERR(clk); dev_err(dev, "Cannot claim MPHY clock.\n"); goto clk_err; } From fa39ab5184d64563cd36f2fb5f0d3fbad83a432c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 7 Aug 2020 15:23:33 -0500 Subject: [PATCH 115/290] scsi: fcoe: Fix I/O path allocation ixgbe_fcoe_ddp_setup() can be called from the main I/O path and is called with a spin_lock held, so we have to use GFP_ATOMIC allocation instead of GFP_KERNEL. Link: https://lore.kernel.org/r/1596831813-9839-1-git-send-email-michael.christie@oracle.com cc: Hannes Reinecke Reviewed-by: Lee Duncan Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index e67b1a59ecb7..0fcd82036d4e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -193,7 +193,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid, } /* alloc the udl from per cpu ddp pool */ - ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_KERNEL, &ddp->udp); + ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp); if (!ddp->udl) { e_err(drv, "failed allocated ddp context\n"); goto out_noddp_unmap; From 93b6c5db06028a3b55122bbb74d0715dd8ca4ae0 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Sun, 9 Aug 2020 13:07:34 +0800 Subject: [PATCH 116/290] scsi: ufs: Fix possible infinite loop in ufshcd_hold In ufshcd_suspend(), after clk-gating is suspended and link is set as Hibern8 state, ufshcd_hold() is still possibly invoked before ufshcd_suspend() returns. For example, MediaTek's suspend vops may issue UIC commands which would call ufshcd_hold() during the command issuing flow. Now if UFSHCD_CAP_HIBERN8_WITH_CLK_GATING capability is enabled, then ufshcd_hold() may enter infinite loops because there is no clk-ungating work scheduled or pending. In this case, ufshcd_hold() shall just bypass, and keep the link as Hibern8 state. Link: https://lore.kernel.org/r/20200809050734.18740-1-stanley.chu@mediatek.com Reviewed-by: Avri Altman Co-developed-by: Andy Teng Signed-off-by: Andy Teng Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 307622284239..b4f948027b3e 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1561,6 +1561,7 @@ static void ufshcd_ungate_work(struct work_struct *work) int ufshcd_hold(struct ufs_hba *hba, bool async) { int rc = 0; + bool flush_result; unsigned long flags; if (!ufshcd_is_clkgating_allowed(hba)) @@ -1592,7 +1593,9 @@ int ufshcd_hold(struct ufs_hba *hba, bool async) break; } spin_unlock_irqrestore(hba->host->host_lock, flags); - flush_work(&hba->clk_gating.ungate_work); + flush_result = flush_work(&hba->clk_gating.ungate_work); + if (hba->clk_gating.is_suspended && !flush_result) + goto out; spin_lock_irqsave(hba->host->host_lock, flags); goto start; } From 215d326702516366a8dfa9624b67ba0761c22b86 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Sun, 9 Aug 2020 13:57:02 +0800 Subject: [PATCH 117/290] scsi: ufs-mediatek: Fix incorrect time to wait link status Fix incorrect calculation of "ms" based waiting time in function ufs_mtk_setup_clocks(). Link: https://lore.kernel.org/r/20200809055702.20140-1-stanley.chu@mediatek.com Fixes: 9006e3986f66 ("scsi: ufs-mediatek: Do not gate clocks if auto-hibern8 is not entered yet") Reviewed-by: Avri Altman Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 29cd017c1aa0..1755dd6b04ae 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -212,7 +212,7 @@ static int ufs_mtk_wait_link_state(struct ufs_hba *hba, u32 state, ktime_t timeout, time_checked; u32 val; - timeout = ktime_add_us(ktime_get(), ms_to_ktime(max_wait_ms)); + timeout = ktime_add_ms(ktime_get(), max_wait_ms); do { time_checked = ktime_get(); ufshcd_writel(hba, 0x20, REG_UFS_DEBUG_SEL); From 8da76f71fef7d8a1a72af09d48899573feb60065 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 10 Aug 2020 17:10:24 +0300 Subject: [PATCH 118/290] scsi: ufs-pci: Add quirk for broken auto-hibernate for Intel EHL Intel EHL UFS host controller advertises auto-hibernate capability but it does not work correctly. Add a quirk for that. [mkp: checkpatch fix] Link: https://lore.kernel.org/r/20200810141024.28859-1-adrian.hunter@intel.com Fixes: 8c09d7527697 ("scsi: ufshdc-pci: Add Intel PCI IDs for EHL") Acked-by: Stanley Chu Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 16 ++++++++++++++-- drivers/scsi/ufs/ufshcd.h | 9 ++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index f407b13883ac..5a95a7bfbab0 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -44,11 +44,23 @@ static int ufs_intel_link_startup_notify(struct ufs_hba *hba, return err; } +static int ufs_intel_ehl_init(struct ufs_hba *hba) +{ + hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; + return 0; +} + static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = { .name = "intel-pci", .link_startup_notify = ufs_intel_link_startup_notify, }; +static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = { + .name = "intel-pci", + .init = ufs_intel_ehl_init, + .link_startup_notify = ufs_intel_link_startup_notify, +}; + #ifdef CONFIG_PM_SLEEP /** * ufshcd_pci_suspend - suspend power management function @@ -177,8 +189,8 @@ static const struct dev_pm_ops ufshcd_pci_pm_ops = { static const struct pci_device_id ufshcd_pci_tbl[] = { { PCI_VENDOR_ID_SAMSUNG, 0xC00C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, { PCI_VDEVICE(INTEL, 0x9DFA), (kernel_ulong_t)&ufs_intel_cnl_hba_vops }, - { PCI_VDEVICE(INTEL, 0x4B41), (kernel_ulong_t)&ufs_intel_cnl_hba_vops }, - { PCI_VDEVICE(INTEL, 0x4B43), (kernel_ulong_t)&ufs_intel_cnl_hba_vops }, + { PCI_VDEVICE(INTEL, 0x4B41), (kernel_ulong_t)&ufs_intel_ehl_hba_vops }, + { PCI_VDEVICE(INTEL, 0x4B43), (kernel_ulong_t)&ufs_intel_ehl_hba_vops }, { } /* terminate list */ }; diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index b2ef18f1b746..363589c0bd37 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -520,6 +520,12 @@ enum ufshcd_quirks { * OCS FATAL ERROR with device error through sense data */ UFSHCD_QUIRK_BROKEN_OCS_FATAL_ERROR = 1 << 10, + + /* + * This quirk needs to be enabled if the host controller has + * auto-hibernate capability but it doesn't work. + */ + UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8 = 1 << 11, }; enum ufshcd_caps { @@ -803,7 +809,8 @@ return true; static inline bool ufshcd_is_auto_hibern8_supported(struct ufs_hba *hba) { - return (hba->capabilities & MASK_AUTO_HIBERN8_SUPPORT); + return (hba->capabilities & MASK_AUTO_HIBERN8_SUPPORT) && + !(hba->quirks & UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8); } static inline bool ufshcd_is_auto_hibern8_enabled(struct ufs_hba *hba) From 6337f58cec030b34ced435b3d9d7d29d63c96e36 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 11 Aug 2020 16:39:35 +0300 Subject: [PATCH 119/290] scsi: ufs: Fix interrupt error message for shared interrupts The interrupt might be shared, in which case it is not an error for the interrupt handler to be called when the interrupt status is zero, so don't print the message unless there was enabled interrupt status. Link: https://lore.kernel.org/r/20200811133936.19171-1-adrian.hunter@intel.com Fixes: 9333d7757348 ("scsi: ufs: Fix irq return code") Reviewed-by: Avri Altman Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index b4f948027b3e..22f801428a07 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5969,7 +5969,7 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); } while (intr_status && --retries); - if (retval == IRQ_NONE) { + if (enabled_intr_status && retval == IRQ_NONE) { dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x\n", __func__, intr_status); ufshcd_dump_regs(hba, 0, UFSHCI_REG_SPACE_SIZE, "host_regs: "); From 127d5f7c4b653b8be5eb3b2c7bbe13728f9003ff Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 11 Aug 2020 16:39:36 +0300 Subject: [PATCH 120/290] scsi: ufs: Improve interrupt handling for shared interrupts For shared interrupts, the interrupt status might be zero, so check that first. Link: https://lore.kernel.org/r/20200811133936.19171-2-adrian.hunter@intel.com Reviewed-by: Avri Altman Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 22f801428a07..0af3dd37ae6f 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5944,7 +5944,7 @@ static irqreturn_t ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status) */ static irqreturn_t ufshcd_intr(int irq, void *__hba) { - u32 intr_status, enabled_intr_status; + u32 intr_status, enabled_intr_status = 0; irqreturn_t retval = IRQ_NONE; struct ufs_hba *hba = __hba; int retries = hba->nutrs; @@ -5958,7 +5958,7 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) * read, make sure we handle them by checking the interrupt status * again in a loop until we process all of the reqs before returning. */ - do { + while (intr_status && retries--) { enabled_intr_status = intr_status & ufshcd_readl(hba, REG_INTERRUPT_ENABLE); if (intr_status) @@ -5967,7 +5967,7 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) retval |= ufshcd_sl_intr(hba, enabled_intr_status); intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); - } while (intr_status && --retries); + } if (enabled_intr_status && retval == IRQ_NONE) { dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x\n", From b10178ee7fa88b68a9e8adc06534d2605cb0ec23 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 11 Aug 2020 16:18:58 +0200 Subject: [PATCH 121/290] scsi: ufs: Clean up completed request without interrupt notification If somehow no interrupt notification is raised for a completed request and its doorbell bit is cleared by host, UFS driver needs to cleanup its outstanding bit in ufshcd_abort(). Otherwise, system may behave abnormally in the following scenario: After ufshcd_abort() returns, this request will be requeued by SCSI layer with its outstanding bit set. Any future completed request will trigger ufshcd_transfer_req_compl() to handle all "completed outstanding bits". At this time the "abnormal outstanding bit" will be detected and the "requeued request" will be chosen to execute request post-processing flow. This is wrong because this request is still "alive". Link: https://lore.kernel.org/r/20200811141859.27399-2-huobean@gmail.com Reviewed-by: Can Guo Acked-by: Avri Altman Signed-off-by: Stanley Chu Signed-off-by: Bean Huo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 0af3dd37ae6f..5d49361e7d88 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6495,7 +6495,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) /* command completed already */ dev_err(hba->dev, "%s: cmd at tag %d successfully cleared from DB.\n", __func__, tag); - goto out; + goto cleanup; } else { dev_err(hba->dev, "%s: no response from device. tag = %d, err %d\n", @@ -6529,6 +6529,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) goto out; } +cleanup: scsi_dma_unmap(cmd); spin_lock_irqsave(host->host_lock, flags); From d87a1f6d021f401f99ae2ba8ccddb87dac921308 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Tue, 11 Aug 2020 16:18:59 +0200 Subject: [PATCH 122/290] scsi: ufs: No need to send Abort Task if the task in DB was cleared If the bit corresponding to a task in the Doorbell register has been cleared, no need to poll the status of the task on the device side and to send an Abort Task TM. Instead, let it directly goto cleanup. In addition, to keep original debug output, move the goto below the debug print. Link: https://lore.kernel.org/r/20200811141859.27399-3-huobean@gmail.com Reviewed-by: Stanley Chu Reviewed-by: Can Guo Signed-off-by: Bean Huo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 5d49361e7d88..da199fa7a3e0 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6437,14 +6437,8 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) goto out; } - if (!(reg & (1 << tag))) { - dev_err(hba->dev, - "%s: cmd was completed, but without a notifying intr, tag = %d", - __func__, tag); - } - /* Print Transfer Request of aborted task */ - dev_err(hba->dev, "%s: Device abort task at tag %d\n", __func__, tag); + dev_info(hba->dev, "%s: Device abort task at tag %d\n", __func__, tag); /* * Print detailed info about aborted request. @@ -6465,6 +6459,13 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) } hba->req_abort_count++; + if (!(reg & (1 << tag))) { + dev_err(hba->dev, + "%s: cmd was completed, but without a notifying intr, tag = %d", + __func__, tag); + goto cleanup; + } + /* Skip task abort in case previous aborts failed and report failure */ if (lrbp->req_abort_skip) { err = -EIO; From 2d9a2c5f581be3991ba67fa9e7497c711220ea8e Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Thu, 13 Aug 2020 17:28:56 +0200 Subject: [PATCH 123/290] scsi: zfcp: Fix use-after-free in request timeout handlers Before v4.15 commit 75492a51568b ("s390/scsi: Convert timers to use timer_setup()"), we intentionally only passed zfcp_adapter as context argument to zfcp_fsf_request_timeout_handler(). Since we only trigger adapter recovery, it was unnecessary to sync against races between timeout and (late) completion. Likewise, we only passed zfcp_erp_action as context argument to zfcp_erp_timeout_handler(). Since we only wakeup an ERP action, it was unnecessary to sync against races between timeout and (late) completion. Meanwhile the timeout handlers get timer_list as context argument and do a timer-specific container-of to zfcp_fsf_req which can have been freed. Fix it by making sure that any request timeout handlers, that might just have started before del_timer(), are completed by using del_timer_sync() instead. This ensures the request free happens afterwards. Space time diagram of potential use-after-free: Basic idea is to have 2 or more pending requests whose timeouts run out at almost the same time. req 1 timeout ERP thread req 2 timeout ---------------- ---------------- --------------------------------------- zfcp_fsf_request_timeout_handler fsf_req = from_timer(fsf_req, t, timer) adapter = fsf_req->adapter zfcp_qdio_siosl(adapter) zfcp_erp_adapter_reopen(adapter,...) zfcp_erp_strategy ... zfcp_fsf_req_dismiss_all list_for_each_entry_safe zfcp_fsf_req_complete 1 del_timer 1 zfcp_fsf_req_free 1 zfcp_fsf_req_complete 2 zfcp_fsf_request_timeout_handler del_timer 2 fsf_req = from_timer(fsf_req, t, timer) zfcp_fsf_req_free 2 adapter = fsf_req->adapter ^^^^^^^ already freed Link: https://lore.kernel.org/r/20200813152856.50088-1-maier@linux.ibm.com Fixes: 75492a51568b ("s390/scsi: Convert timers to use timer_setup()") Cc: #4.15+ Suggested-by: Julian Wiedmann Reviewed-by: Julian Wiedmann Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fsf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index c795f22249d8..140186fe1d1e 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -434,7 +434,7 @@ static void zfcp_fsf_req_complete(struct zfcp_fsf_req *req) return; } - del_timer(&req->timer); + del_timer_sync(&req->timer); zfcp_fsf_protstatus_eval(req); zfcp_fsf_fsfstatus_eval(req); req->handler(req); @@ -867,7 +867,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) req->qdio_req.qdio_outb_usage = atomic_read(&qdio->req_q_free); req->issued = get_tod_clock(); if (zfcp_qdio_send(qdio, &req->qdio_req)) { - del_timer(&req->timer); + del_timer_sync(&req->timer); /* lookup request again, list might have changed */ zfcp_reqlist_find_rm(adapter->req_list, req_id); zfcp_erp_adapter_reopen(adapter, 0, "fsrs__1"); From 223f91b48079227f914657f07d2d686f7b60aa26 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Thu, 13 Aug 2020 11:57:38 -0400 Subject: [PATCH 124/290] scsi: scsi_debug: Fix scp is NULL errors John Garry reported 'sdebug_q_cmd_complete: scp is NULL' failures that were mainly seen on aarch64 machines (e.g. RPi 4 with four A72 CPUs). The problem was tracked down to a missing critical section on a "short circuit" path. Namely, the time to process the current command so far has already exceeded the requested command duration (i.e. the number of nanoseconds in the ndelay parameter). The random=1 parameter setting was pivotal in finding this error. The failure scenario involved first taking that "short circuit" path (due to a very short command duration) and then taking the more likely hrtimer_start() path (due to a longer command duration). With random=1 each command's duration is taken from the uniformly distributed [0..ndelay) interval. The fio utility also helped by reliably generating the error scenario at about once per minute on a RPi 4 (64 bit OS). Link: https://lore.kernel.org/r/20200813155738.109298-1-dgilbert@interlog.com Reported-by: John Garry Reviewed-by: Lee Duncan Signed-off-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 064ed680c053..139f0073da37 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -5490,9 +5490,11 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, u64 d = ktime_get_boottime_ns() - ns_from_boot; if (kt <= d) { /* elapsed duration >= kt */ + spin_lock_irqsave(&sqp->qc_lock, iflags); sqcp->a_cmnd = NULL; atomic_dec(&devip->num_in_q); clear_bit(k, sqp->in_use_bm); + spin_unlock_irqrestore(&sqp->qc_lock, iflags); if (new_sd_dp) kfree(sd_dp); /* call scsi_done() from this thread */ From c314a014b1802d30f1dc50db18b2e7f5a77d19c6 Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Wed, 5 Aug 2020 17:05:46 -0300 Subject: [PATCH 125/290] scsi: qla2xxx: Use MBX_TOV_SECONDS for mailbox command timeout values Improves readability of qla_mbx.c. Link: https://lore.kernel.org/r/20200805200546.22497-1-ematsumiya@suse.de Reviewed-by: Himanshu Madhani Reviewed-by: Roman Bolshakov Signed-off-by: Enzo Matsumiya Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_mbx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 73883435ab58..77e1e24125ca 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -5240,7 +5240,7 @@ qla2x00_read_ram_word(scsi_qla_host_t *vha, uint32_t risc_addr, uint32_t *data) mcp->mb[8] = MSW(risc_addr); mcp->out_mb = MBX_8|MBX_1|MBX_0; mcp->in_mb = MBX_3|MBX_2|MBX_0; - mcp->tov = 30; + mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; rval = qla2x00_mailbox_command(vha, mcp); if (rval != QLA_SUCCESS) { @@ -5428,7 +5428,7 @@ qla2x00_write_ram_word(scsi_qla_host_t *vha, uint32_t risc_addr, uint32_t data) mcp->mb[8] = MSW(risc_addr); mcp->out_mb = MBX_8|MBX_3|MBX_2|MBX_1|MBX_0; mcp->in_mb = MBX_1|MBX_0; - mcp->tov = 30; + mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; rval = qla2x00_mailbox_command(vha, mcp); if (rval != QLA_SUCCESS) { @@ -5700,7 +5700,7 @@ qla24xx_set_fcp_prio(scsi_qla_host_t *vha, uint16_t loop_id, uint16_t priority, mcp->mb[9] = vha->vp_idx; mcp->out_mb = MBX_9|MBX_4|MBX_3|MBX_2|MBX_1|MBX_0; mcp->in_mb = MBX_4|MBX_3|MBX_1|MBX_0; - mcp->tov = 30; + mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; rval = qla2x00_mailbox_command(vha, mcp); if (mb != NULL) { @@ -5787,7 +5787,7 @@ qla82xx_mbx_intr_enable(scsi_qla_host_t *vha) mcp->out_mb = MBX_1|MBX_0; mcp->in_mb = MBX_0; - mcp->tov = 30; + mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; rval = qla2x00_mailbox_command(vha, mcp); @@ -5822,7 +5822,7 @@ qla82xx_mbx_intr_disable(scsi_qla_host_t *vha) mcp->out_mb = MBX_1|MBX_0; mcp->in_mb = MBX_0; - mcp->tov = 30; + mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; rval = qla2x00_mailbox_command(vha, mcp); @@ -6014,7 +6014,7 @@ qla81xx_set_led_config(scsi_qla_host_t *vha, uint16_t *led_cfg) if (IS_QLA8031(ha)) mcp->out_mb |= MBX_6|MBX_5|MBX_4|MBX_3; mcp->in_mb = MBX_0; - mcp->tov = 30; + mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; rval = qla2x00_mailbox_command(vha, mcp); @@ -6050,7 +6050,7 @@ qla81xx_get_led_config(scsi_qla_host_t *vha, uint16_t *led_cfg) mcp->in_mb = MBX_2|MBX_1|MBX_0; if (IS_QLA8031(ha)) mcp->in_mb |= MBX_6|MBX_5|MBX_4|MBX_3; - mcp->tov = 30; + mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; rval = qla2x00_mailbox_command(vha, mcp); From 10ae30ba664822f62de169a61628e31c999c7cc8 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 6 Aug 2020 04:10:04 -0700 Subject: [PATCH 126/290] scsi: qla2xxx: Flush all sessions on zone disable On Zone Disable, certain switches would ignore all commands. This causes timeout for both switch scan command and abort of that command. On detection of this condition, all sessions will be shutdown. Link: https://lore.kernel.org/r/20200806111014.28434-2-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_gs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index df670fba2ab8..c6b6a3250312 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3736,6 +3736,18 @@ static void qla2x00_async_gpnft_gnnft_sp_done(srb_t *sp, int res) unsigned long flags; const char *name = sp->name; + if (res == QLA_OS_TIMER_EXPIRED) { + /* switch is ignoring all commands. + * This might be a zone disable behavior. + * This means we hit 64s timeout. + * 22s GPNFT + 44s Abort = 64s + */ + ql_dbg(ql_dbg_disc, vha, 0xffff, + "%s: Switch Zone check please .\n", + name); + qla2x00_mark_all_devices_lost(vha); + } + /* * We are in an Interrupt context, queue up this * sp for GNNFT_DONE work. This will allow all From a117579d0205b5a0592a3a98493e2b875e4da236 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 6 Aug 2020 04:10:05 -0700 Subject: [PATCH 127/290] scsi: qla2xxx: Flush I/O on zone disable Perform implicit logout to flush I/O on zone disable. Link: https://lore.kernel.org/r/20200806111014.28434-3-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_gs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index c6b6a3250312..707407344670 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3436,7 +3436,6 @@ void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp) list_for_each_entry(fcport, &vha->vp_fcports, list) { if ((fcport->flags & FCF_FABRIC_DEVICE) != 0) { fcport->scan_state = QLA_FCPORT_SCAN; - fcport->logout_on_delete = 0; } } goto login_logout; From 4709272f6327cc4a8ee1dc55771bcf9718346980 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 6 Aug 2020 04:10:06 -0700 Subject: [PATCH 128/290] scsi: qla2xxx: Indicate correct supported speeds for Mezz card Correct the supported speeds for 16G Mezz card. Link: https://lore.kernel.org/r/20200806111014.28434-4-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_gs.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 707407344670..3bfb5678f651 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -1505,11 +1505,11 @@ qla2x00_prep_ct_fdmi_req(struct ct_sns_pkt *p, uint16_t cmd, static uint qla25xx_fdmi_port_speed_capability(struct qla_hw_data *ha) { + uint speeds = 0; + if (IS_CNA_CAPABLE(ha)) return FDMI_PORT_SPEED_10GB; if (IS_QLA28XX(ha) || IS_QLA27XX(ha)) { - uint speeds = 0; - if (ha->max_supported_speed == 2) { if (ha->min_supported_speed <= 6) speeds |= FDMI_PORT_SPEED_64GB; @@ -1536,9 +1536,16 @@ qla25xx_fdmi_port_speed_capability(struct qla_hw_data *ha) } return speeds; } - if (IS_QLA2031(ha)) - return FDMI_PORT_SPEED_16GB|FDMI_PORT_SPEED_8GB| - FDMI_PORT_SPEED_4GB; + if (IS_QLA2031(ha)) { + if ((ha->pdev->subsystem_vendor == 0x103C) && + (ha->pdev->subsystem_device == 0x8002)) { + speeds = FDMI_PORT_SPEED_16GB; + } else { + speeds = FDMI_PORT_SPEED_16GB|FDMI_PORT_SPEED_8GB| + FDMI_PORT_SPEED_4GB; + } + return speeds; + } if (IS_QLA25XX(ha)) return FDMI_PORT_SPEED_8GB|FDMI_PORT_SPEED_4GB| FDMI_PORT_SPEED_2GB|FDMI_PORT_SPEED_1GB; From abb31aeaa9b20680b0620b23fea5475ea4591e31 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 6 Aug 2020 04:10:07 -0700 Subject: [PATCH 129/290] scsi: qla2xxx: Fix login timeout Multipath errors were seen during failback due to login timeout. The remote device sent LOGO, the local host tore down the session and did relogin. The RSCN arrived indicates remote device is going through failover after which the relogin is in a 20s timeout phase. At this point the driver is stuck in the relogin process. Add a fix to delete the session as part of abort/flush the login. Link: https://lore.kernel.org/r/20200806111014.28434-5-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_gs.c | 18 +++++++++++++++--- drivers/scsi/qla2xxx/qla_target.c | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 3bfb5678f651..de9fd7f688d0 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3538,10 +3538,22 @@ void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp) } if (fcport->scan_state != QLA_FCPORT_FOUND) { + bool do_delete = false; + + if (fcport->scan_needed && + fcport->disc_state == DSC_LOGIN_PEND) { + /* Cable got disconnected after we sent + * a login. Do delete to prevent timeout. + */ + fcport->logout_on_delete = 1; + do_delete = true; + } + fcport->scan_needed = 0; - if ((qla_dual_mode_enabled(vha) || - qla_ini_mode_enabled(vha)) && - atomic_read(&fcport->state) == FCS_ONLINE) { + if (((qla_dual_mode_enabled(vha) || + qla_ini_mode_enabled(vha)) && + atomic_read(&fcport->state) == FCS_ONLINE) || + do_delete) { if (fcport->loop_id != FC_NO_LOOP_ID) { if (fcport->flags & FCF_FCP2_DEVICE) fcport->logout_on_delete = 0; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index fbb80a043b4f..90289162dbd4 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1270,7 +1270,7 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) qla24xx_chk_fcp_state(sess); - ql_dbg(ql_dbg_tgt, sess->vha, 0xe001, + ql_dbg(ql_dbg_disc, sess->vha, 0xe001, "Scheduling sess %p for deletion %8phC\n", sess, sess->port_name); From 81b9d1e19d62bf876c3985dbaf53a3a50eedd74b Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 6 Aug 2020 04:10:08 -0700 Subject: [PATCH 130/290] scsi: qla2xxx: Reduce noisy debug message Update debug level and message for ELS IOCB done. Link: https://lore.kernel.org/r/20200806111014.28434-6-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_isr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 27bcd346af7c..ab5275dbc338 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2024,8 +2024,8 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req, res = DID_ERROR << 16; } } - ql_dbg(ql_dbg_user, vha, 0x503f, - "ELS IOCB Done -%s error hdl=%x comp_status=0x%x error subcode 1=0x%x error subcode 2=0x%x total_byte=0x%x\n", + ql_dbg(ql_dbg_disc, vha, 0x503f, + "ELS IOCB Done -%s hdl=%x comp_status=0x%x error subcode 1=0x%x error subcode 2=0x%x total_byte=0x%x\n", type, sp->handle, comp_status, fw_status[1], fw_status[2], le32_to_cpu(ese->total_byte_count)); goto els_ct_done; From 49030003a38a5e3688781e2ccf9550dcebf61282 Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Thu, 6 Aug 2020 04:10:09 -0700 Subject: [PATCH 131/290] scsi: qla2xxx: Allow ql2xextended_error_logging special value 1 to be set anytime ql2xextended_error_logging can now be set to 1 to get the default mask value, as opposed to at module load time only. Link: https://lore.kernel.org/r/20200806111014.28434-7-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Arun Easi Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dbg.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index 91eb6901815c..e1d7de63e8f8 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -380,5 +380,8 @@ extern int qla24xx_soft_reset(struct qla_hw_data *); static inline int ql_mask_match(uint level) { + if (ql2xextended_error_logging == 1) + ql2xextended_error_logging = QL_DBG_DEFAULT1_MASK; + return (level & ql2xextended_error_logging) == level; } From 897d68eb816bfae5ad9e870f68350dbb599d6e0e Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Thu, 6 Aug 2020 04:10:10 -0700 Subject: [PATCH 132/290] scsi: qla2xxx: Fix WARN_ON in qla_nvme_register_hba qla_nvme_register_hba() puts out a warning when there are not enough queue pairs available for FC-NVME. Just fail the NVME registration rather than a WARNING + call Trace. Link: https://lore.kernel.org/r/20200806111014.28434-8-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Arun Easi Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_def.h | 1 + drivers/scsi/qla2xxx/qla_nvme.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 8c92af5e4390..1bc090d8a71b 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3880,6 +3880,7 @@ struct qla_hw_data { uint32_t scm_supported_f:1; /* Enabled in Driver */ uint32_t scm_enabled:1; + uint32_t max_req_queue_warned:1; } flags; uint16_t max_exchg; diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index fa695a4007f8..2722ffc1788f 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -687,7 +687,15 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) tmpl = &qla_nvme_fc_transport; WARN_ON(vha->nvme_local_port); - WARN_ON(ha->max_req_queues < 3); + + if (ha->max_req_queues < 3) { + if (!ha->flags.max_req_queue_warned) + ql_log(ql_log_info, vha, 0x2120, + "%s: Disabling FC-NVME due to lack of free queue pairs (%d).\n", + __func__, ha->max_req_queues); + ha->flags.max_req_queue_warned = 1; + return ret; + } qla_nvme_fc_transport.max_hw_queues = min((uint8_t)(qla_nvme_fc_transport.max_hw_queues), From dffa11453313a115157b19021cc2e27ea98e624c Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Thu, 6 Aug 2020 04:10:11 -0700 Subject: [PATCH 133/290] scsi: qla2xxx: Check if FW supports MQ before enabling OS boot during Boot from SAN was stuck at dracut emergency shell after enabling NVMe driver parameter. For non-MQ support the driver was enabling MQ. Add a check to confirm if FW supports MQ. Link: https://lore.kernel.org/r/20200806111014.28434-9-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 9b59f032a569..fda812b9b564 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2017,6 +2017,11 @@ qla2x00_iospace_config(struct qla_hw_data *ha) /* Determine queue resources */ ha->max_req_queues = ha->max_rsp_queues = 1; ha->msix_count = QLA_BASE_VECTORS; + + /* Check if FW supports MQ or not */ + if (!(ha->fw_attributes & BIT_6)) + goto mqiobase_exit; + if (!ql2xmqsupport || !ql2xnvmeenable || (!IS_QLA25XX(ha) && !IS_QLA81XX(ha))) goto mqiobase_exit; From 83949613fac61e8e37eadf8275bf072342302f4e Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 6 Aug 2020 04:10:12 -0700 Subject: [PATCH 134/290] scsi: qla2xxx: Fix null pointer access during disconnect from subsystem NVMEAsync command is being submitted to QLA while the same NVMe controller is in the middle of reset. The reset path has deleted the association and freed aen_op->fcp_req.private. Add a check for this private pointer before issuing the command. ... 6 [ffffb656ca11fce0] page_fault at ffffffff8c00114e [exception RIP: qla_nvme_post_cmd+394] RIP: ffffffffc0d012ba RSP: ffffb656ca11fd98 RFLAGS: 00010206 RAX: ffff8fb039eda228 RBX: ffff8fb039eda200 RCX: 00000000000da161 RDX: ffffffffc0d4d0f0 RSI: ffffffffc0d26c9b RDI: ffff8fb039eda220 RBP: 0000000000000013 R8: ffff8fb47ff6aa80 R9: 0000000000000002 R10: 0000000000000000 R11: ffffb656ca11fdc8 R12: ffff8fb27d04a3b0 R13: ffff8fc46dd98a58 R14: 0000000000000000 R15: ffff8fc4540f0000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 7 [ffffb656ca11fe08] nvme_fc_start_fcp_op at ffffffffc0241568 [nvme_fc] 8 [ffffb656ca11fe50] nvme_fc_submit_async_event at ffffffffc0241901 [nvme_fc] 9 [ffffb656ca11fe68] nvme_async_event_work at ffffffffc014543d [nvme_core] 10 [ffffb656ca11fe98] process_one_work at ffffffff8b6cd437 11 [ffffb656ca11fed8] worker_thread at ffffffff8b6cdcef 12 [ffffb656ca11ff10] kthread at ffffffff8b6d3402 13 [ffffb656ca11ff50] ret_from_fork at ffffffff8c000255 -- PID: 37824 TASK: ffff8fb033063d80 CPU: 20 COMMAND: "kworker/u97:451" 0 [ffffb656ce1abc28] __schedule at ffffffff8be629e3 1 [ffffb656ce1abcc8] schedule at ffffffff8be62fe8 2 [ffffb656ce1abcd0] schedule_timeout at ffffffff8be671ed 3 [ffffb656ce1abd70] wait_for_completion at ffffffff8be639cf 4 [ffffb656ce1abdd0] flush_work at ffffffff8b6ce2d5 5 [ffffb656ce1abe70] nvme_stop_ctrl at ffffffffc0144900 [nvme_core] 6 [ffffb656ce1abe80] nvme_fc_reset_ctrl_work at ffffffffc0243445 [nvme_fc] 7 [ffffb656ce1abe98] process_one_work at ffffffff8b6cd437 8 [ffffb656ce1abed8] worker_thread at ffffffff8b6cdb50 9 [ffffb656ce1abf10] kthread at ffffffff8b6d3402 10 [ffffb656ce1abf50] ret_from_fork at ffffffff8c000255 Link: https://lore.kernel.org/r/20200806111014.28434-10-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_nvme.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 2722ffc1788f..90bbc61f361b 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -536,6 +536,11 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, struct nvme_private *priv = fd->private; struct qla_nvme_rport *qla_rport = rport->private; + if (!priv) { + /* nvme association has been torn down */ + return rval; + } + fcport = qla_rport->fcport; if (!qpair || !fcport || (qpair && !qpair->fw_started) || From de7e6194301ad31c4ce95395eb678e51a1b907e5 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Thu, 6 Aug 2020 04:10:13 -0700 Subject: [PATCH 135/290] Revert "scsi: qla2xxx: Fix crash on qla2x00_mailbox_command" FCoE adapter initialization failed for ISP8021 with the following patch applied. In addition, reproduction of the issue the patch originally tried to address has been unsuccessful. This reverts commit 3cb182b3fa8b7a61f05c671525494697cba39c6a. Link: https://lore.kernel.org/r/20200806111014.28434-11-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_mbx.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 77e1e24125ca..226f1428d3e5 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -334,14 +334,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) if (time_after(jiffies, wait_time)) break; - /* - * Check if it's UNLOADING, cause we cannot poll in - * this case, or else a NULL pointer dereference - * is triggered. - */ - if (unlikely(test_bit(UNLOADING, &base_vha->dpc_flags))) - return QLA_FUNCTION_TIMEOUT; - /* Check for pending interrupts. */ qla2x00_poll(ha->rsp_q_map[0]); From dca93232b361d260413933903cd4bdbd92ebcc7f Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 6 Aug 2020 04:10:14 -0700 Subject: [PATCH 136/290] Revert "scsi: qla2xxx: Disable T10-DIF feature with FC-NVMe during probe" FCP T10-PI and NVMe features are independent of each other. This patch allows both features to co-exist. This reverts commit 5da05a26b8305a625bc9d537671b981795b46dab. Link: https://lore.kernel.org/r/20200806111014.28434-12-njavali@marvell.com Fixes: 5da05a26b830 ("scsi: qla2xxx: Disable T10-DIF feature with FC-NVMe during probe") Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index fda812b9b564..8da00ba54aec 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2834,10 +2834,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) /* This may fail but that's ok */ pci_enable_pcie_error_reporting(pdev); - /* Turn off T10-DIF when FC-NVMe is enabled */ - if (ql2xnvmeenable) - ql2xenabledif = 0; - ha = kzalloc(sizeof(struct qla_hw_data), GFP_KERNEL); if (!ha) { ql_log_pci(ql_log_fatal, pdev, 0x0009, From 48d2f0407be7a36e8f20be37ec9121e021ef3964 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 5 Aug 2020 15:27:29 +0000 Subject: [PATCH 137/290] powerpc/kasan: Fix KASAN_SHADOW_START on BOOK3S_32 On BOOK3S_32, when we have modules and strict kernel RWX, modules are not in vmalloc space but in a dedicated segment that is below PAGE_OFFSET. So KASAN_SHADOW_START must take it into account. MODULES_VADDR can't be used because it is not defined yet in kasan.h Fixes: 6ca055322da8 ("powerpc/32s: Use dedicated segment for modules with STRICT_KERNEL_RWX") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6eddca2d5611fd57312a88eae31278c87a8fc99d.1596641224.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/kasan.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index d635b96c7ea6..7355ed05e65e 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -15,11 +15,18 @@ #ifndef __ASSEMBLY__ #include +#include #define KASAN_SHADOW_SCALE_SHIFT 3 +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX) +#define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) +#else +#define KASAN_KERN_START PAGE_OFFSET +#endif + #define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ - (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT)) + (KASAN_KERN_START >> KASAN_SHADOW_SCALE_SHIFT)) #define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) From 7bee31ad8e2f6c276f36993346ac70f4d4c80e45 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 5 Aug 2020 15:27:28 +0000 Subject: [PATCH 138/290] powerpc/32s: Fix is_module_segment() when MODULES_VADDR is defined When MODULES_VADDR is defined, is_module_segment() shall check the address against it instead of checking agains VMALLOC_START. Fixes: 6ca055322da8 ("powerpc/32s: Use dedicated segment for modules with STRICT_KERNEL_RWX") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/07884ed033c31e074747b7eb8eaa329d15db07ec.1596641219.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index c0162911f6cb..82ae9e06a773 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -191,10 +191,17 @@ static bool is_module_segment(unsigned long addr) { if (!IS_ENABLED(CONFIG_MODULES)) return false; +#ifdef MODULES_VADDR + if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) + return false; + if (addr >= ALIGN(MODULES_END, SZ_256M)) + return false; +#else if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) return false; if (addr >= ALIGN(VMALLOC_END, SZ_256M)) return false; +#endif return true; } From 801980f6497946048709b9b09771a1729551d705 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 11 Aug 2020 11:15:44 -0500 Subject: [PATCH 139/290] powerpc/pseries/hotplug-cpu: wait indefinitely for vCPU death For a power9 KVM guest with XIVE enabled, running a test loop where we hotplug 384 vcpus and then unplug them, the following traces can be seen (generally within a few loops) either from the unplugged vcpu: cpu 65 (hwid 65) Ready to die... Querying DEAD? cpu 66 (66) shows 2 list_del corruption. next->prev should be c00a000002470208, but was c00a000002470048 ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:56! Oops: Exception in kernel mode, sig: 5 [#1] LE SMP NR_CPUS=2048 NUMA pSeries Modules linked in: fuse nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 ... CPU: 66 PID: 0 Comm: swapper/66 Kdump: loaded Not tainted 4.18.0-221.el8.ppc64le #1 NIP: c0000000007ab50c LR: c0000000007ab508 CTR: 00000000000003ac REGS: c0000009e5a17840 TRAP: 0700 Not tainted (4.18.0-221.el8.ppc64le) MSR: 800000000282b033 CR: 28000842 XER: 20040000 ... NIP __list_del_entry_valid+0xac/0x100 LR __list_del_entry_valid+0xa8/0x100 Call Trace: __list_del_entry_valid+0xa8/0x100 (unreliable) free_pcppages_bulk+0x1f8/0x940 free_unref_page+0xd0/0x100 xive_spapr_cleanup_queue+0x148/0x1b0 xive_teardown_cpu+0x1bc/0x240 pseries_mach_cpu_die+0x78/0x2f0 cpu_die+0x48/0x70 arch_cpu_idle_dead+0x20/0x40 do_idle+0x2f4/0x4c0 cpu_startup_entry+0x38/0x40 start_secondary+0x7bc/0x8f0 start_secondary_prolog+0x10/0x14 or on the worker thread handling the unplug: pseries-hotplug-cpu: Attempting to remove CPU , drc index: 1000013a Querying DEAD? cpu 314 (314) shows 2 BUG: Bad page state in process kworker/u768:3 pfn:95de1 cpu 314 (hwid 314) Ready to die... page:c00a000002577840 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 flags: 0x5ffffc00000000() raw: 005ffffc00000000 5deadbeef0000100 5deadbeef0000200 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffff7f 0000000000000000 page dumped because: nonzero mapcount Modules linked in: kvm xt_CHECKSUM ipt_MASQUERADE xt_conntrack ... CPU: 0 PID: 548 Comm: kworker/u768:3 Kdump: loaded Not tainted 4.18.0-224.el8.bz1856588.ppc64le #1 Workqueue: pseries hotplug workque pseries_hp_work_fn Call Trace: dump_stack+0xb0/0xf4 (unreliable) bad_page+0x12c/0x1b0 free_pcppages_bulk+0x5bc/0x940 page_alloc_cpu_dead+0x118/0x120 cpuhp_invoke_callback.constprop.5+0xb8/0x760 _cpu_down+0x188/0x340 cpu_down+0x5c/0xa0 cpu_subsys_offline+0x24/0x40 device_offline+0xf0/0x130 dlpar_offline_cpu+0x1c4/0x2a0 dlpar_cpu_remove+0xb8/0x190 dlpar_cpu_remove_by_index+0x12c/0x150 dlpar_cpu+0x94/0x800 pseries_hp_work_fn+0x128/0x1e0 process_one_work+0x304/0x5d0 worker_thread+0xcc/0x7a0 kthread+0x1ac/0x1c0 ret_from_kernel_thread+0x5c/0x80 The latter trace is due to the following sequence: page_alloc_cpu_dead drain_pages drain_pages_zone free_pcppages_bulk where drain_pages() in this case is called under the assumption that the unplugged cpu is no longer executing. To ensure that is the case, and early call is made to __cpu_die()->pseries_cpu_die(), which runs a loop that waits for the cpu to reach a halted state by polling its status via query-cpu-stopped-state RTAS calls. It only polls for 25 iterations before giving up, however, and in the trace above this results in the following being printed only .1 seconds after the hotplug worker thread begins processing the unplug request: pseries-hotplug-cpu: Attempting to remove CPU , drc index: 1000013a Querying DEAD? cpu 314 (314) shows 2 At that point the worker thread assumes the unplugged CPU is in some unknown/dead state and procedes with the cleanup, causing the race with the XIVE cleanup code executed by the unplugged CPU. Fix this by waiting indefinitely, but also making an effort to avoid spurious lockup messages by allowing for rescheduling after polling the CPU status and printing a warning if we wait for longer than 120s. Fixes: eac1e731b59ee ("powerpc/xive: guest exploitation of the XIVE interrupt controller") Suggested-by: Michael Ellerman Signed-off-by: Michael Roth Tested-by: Greg Kurz Reviewed-by: Thiago Jung Bauermann Reviewed-by: Greg Kurz [mpe: Trim oopses in change log slightly for readability] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200811161544.10513-1-mdroth@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index c6e0d8abf75e..7a974ed6b240 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -107,22 +107,28 @@ static int pseries_cpu_disable(void) */ static void pseries_cpu_die(unsigned int cpu) { - int tries; int cpu_status = 1; unsigned int pcpu = get_hard_smp_processor_id(cpu); + unsigned long timeout = jiffies + msecs_to_jiffies(120000); - for (tries = 0; tries < 25; tries++) { + while (true) { cpu_status = smp_query_cpu_stopped(pcpu); if (cpu_status == QCSS_STOPPED || cpu_status == QCSS_HARDWARE_ERROR) break; - cpu_relax(); + if (time_after(jiffies, timeout)) { + pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n", + cpu, pcpu); + timeout = jiffies + msecs_to_jiffies(120000); + } + + cond_resched(); } - if (cpu_status != 0) { - printk("Querying DEAD? cpu %i (%i) shows %i\n", - cpu, pcpu, cpu_status); + if (cpu_status == QCSS_HARDWARE_ERROR) { + pr_warn("CPU %i (hwid %i) reported error while dying\n", + cpu, pcpu); } /* Isolation and deallocation are definitely done by From 1dbcf46d516b3ab9b114ffcb7b8a55ee5e375523 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 29 Jul 2020 12:18:45 +0900 Subject: [PATCH 140/290] extract-cert: add static to local data Fix the following warning from sparse: scripts/extract-cert.c:74:5: warning: symbol 'kbuild_verbose' was not declared. Should it be static? Signed-off-by: Masahiro Yamada --- scripts/extract-cert.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c index b071bf476fea..3bc48c726c41 100644 --- a/scripts/extract-cert.c +++ b/scripts/extract-cert.c @@ -71,7 +71,7 @@ static void drain_openssl_errors(void) static const char *key_pass; static BIO *wb; static char *cert_dst; -int kbuild_verbose; +static int kbuild_verbose; static void write_cert(X509 *x509) { From efbf97265f9fc5e1978df6cf0e3df4d5b157f070 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 12 Aug 2020 23:49:23 +0900 Subject: [PATCH 141/290] kbuild: remove redundant patterns in filter/filter-out The '%' in filter/filter-out matches to any number of any characters, including empty string. So, '%config' matches to 'config', and '%install' to 'install'. Drop the redundant patterns. Signed-off-by: Masahiro Yamada --- Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 9cac6fde3479..c4470a4e131f 100644 --- a/Makefile +++ b/Makefile @@ -265,8 +265,7 @@ no-dot-config-targets := $(clean-targets) \ $(version_h) headers headers_% archheaders archscripts \ %asm-generic kernelversion %src-pkg dt_binding_check \ outputmakefile -no-sync-config-targets := $(no-dot-config-targets) install %install \ - kernelrelease +no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.s %.symtypes %/ config-build := @@ -292,7 +291,7 @@ ifneq ($(KBUILD_EXTMOD),) endif ifeq ($(KBUILD_EXTMOD),) - ifneq ($(filter config %config,$(MAKECMDGOALS)),) + ifneq ($(filter %config,$(MAKECMDGOALS)),) config-build := 1 ifneq ($(words $(MAKECMDGOALS)),1) mixed-build := 1 From e29a6d633e2782ec0278b8c199135ad24a99240d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 14 Aug 2020 18:38:15 -0700 Subject: [PATCH 142/290] genksyms: keywords: Use __restrict not _restrict Use the proper form of the RESTRICT keyword. Quote the comments properly too. Signed-off-by: Joe Perches Acked-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- scripts/genksyms/keywords.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/genksyms/keywords.c b/scripts/genksyms/keywords.c index 7a85c4e21175..057c6cabad1d 100644 --- a/scripts/genksyms/keywords.c +++ b/scripts/genksyms/keywords.c @@ -25,9 +25,9 @@ static struct resword { { "__int128_t", BUILTIN_INT_KEYW }, { "__uint128_t", BUILTIN_INT_KEYW }, - // According to rth, c99 defines "_Bool", __restrict", __restrict__", "restrict". KAO + // According to rth, c99 defines "_Bool", "__restrict", "__restrict__", "restrict". KAO { "_Bool", BOOL_KEYW }, - { "_restrict", RESTRICT_KEYW }, + { "__restrict", RESTRICT_KEYW }, { "__restrict__", RESTRICT_KEYW }, { "restrict", RESTRICT_KEYW }, { "asm", ASM_KEYW }, From d85de3399f97467baa2026fbbbe587850d01ba8a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 18 Aug 2020 01:36:29 +0900 Subject: [PATCH 143/290] kconfig: qconf: fix signal connection to invalid slots If you right-click in the ConfigList window, you will see the following messages in the console: QObject::connect: No such slot QAction::setOn(bool) in scripts/kconfig/qconf.cc:888 QObject::connect: (sender name: 'config') QObject::connect: No such slot QAction::setOn(bool) in scripts/kconfig/qconf.cc:897 QObject::connect: (sender name: 'config') QObject::connect: No such slot QAction::setOn(bool) in scripts/kconfig/qconf.cc:906 QObject::connect: (sender name: 'config') Right, there is no such slot in QAction. I think this is a typo of setChecked. Due to this bug, when you toggled the menu "Option->Show Name/Range/Data" the state of the context menu was not previously updated. Fix this. Fixes: d5d973c3f8a9 ("Port xconfig to Qt5 - Put back some of the old implementation(part 2)") Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index bc390df49f1f..c1812563b818 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -885,7 +885,7 @@ void ConfigList::contextMenuEvent(QContextMenuEvent *e) connect(action, SIGNAL(toggled(bool)), parent(), SLOT(setShowName(bool))); connect(parent(), SIGNAL(showNameChanged(bool)), - action, SLOT(setOn(bool))); + action, SLOT(setChecked(bool))); action->setChecked(showName); headerPopup->addAction(action); @@ -894,7 +894,7 @@ void ConfigList::contextMenuEvent(QContextMenuEvent *e) connect(action, SIGNAL(toggled(bool)), parent(), SLOT(setShowRange(bool))); connect(parent(), SIGNAL(showRangeChanged(bool)), - action, SLOT(setOn(bool))); + action, SLOT(setChecked(bool))); action->setChecked(showRange); headerPopup->addAction(action); @@ -903,7 +903,7 @@ void ConfigList::contextMenuEvent(QContextMenuEvent *e) connect(action, SIGNAL(toggled(bool)), parent(), SLOT(setShowData(bool))); connect(parent(), SIGNAL(showDataChanged(bool)), - action, SLOT(setOn(bool))); + action, SLOT(setChecked(bool))); action->setChecked(showData); headerPopup->addAction(action); } @@ -1275,7 +1275,7 @@ QMenu* ConfigInfoView::createStandardContextMenu(const QPoint & pos) action->setCheckable(true); connect(action, SIGNAL(toggled(bool)), SLOT(setShowDebug(bool))); - connect(this, SIGNAL(showDebugChanged(bool)), action, SLOT(setOn(bool))); + connect(this, SIGNAL(showDebugChanged(bool)), action, SLOT(setChecked(bool))); action->setChecked(showDebug()); popup->addSeparator(); popup->addAction(action); From 7d1300e63161d05735ebadf9b9d8bc89afe49d1c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 18 Aug 2020 01:36:30 +0900 Subject: [PATCH 144/290] kconfig: qconf: fix the popup menu in the ConfigInfoView window I do not know when ConfigInfoView::createStandardContextMenu() is called. Because QTextEdit::createStandardContextMenu() is not virtual, ConfigInfoView::createStandardContextMenu() cannot override it. Even if right-click the ConfigInfoView window, the "Show Debug Info" menu does not show up. Build up the menu in the constructor, and invoke it from the contextMenuEvent(). Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 29 +++++++++++++---------------- scripts/kconfig/qconf.h | 4 ++-- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index c1812563b818..5a0aa159ec80 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1012,6 +1012,16 @@ ConfigInfoView::ConfigInfoView(QWidget* parent, const char *name) configSettings->endGroup(); connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); } + + contextMenu = createStandardContextMenu(); + QAction *action = new QAction("Show Debug Info", contextMenu); + + action->setCheckable(true); + connect(action, SIGNAL(toggled(bool)), SLOT(setShowDebug(bool))); + connect(this, SIGNAL(showDebugChanged(bool)), action, SLOT(setChecked(bool))); + action->setChecked(showDebug()); + contextMenu->addSeparator(); + contextMenu->addAction(action); } void ConfigInfoView::saveSettings(void) @@ -1268,23 +1278,10 @@ void ConfigInfoView::clicked(const QUrl &url) delete data; } -QMenu* ConfigInfoView::createStandardContextMenu(const QPoint & pos) +void ConfigInfoView::contextMenuEvent(QContextMenuEvent *event) { - QMenu* popup = Parent::createStandardContextMenu(pos); - QAction* action = new QAction("Show Debug Info", popup); - - action->setCheckable(true); - connect(action, SIGNAL(toggled(bool)), SLOT(setShowDebug(bool))); - connect(this, SIGNAL(showDebugChanged(bool)), action, SLOT(setChecked(bool))); - action->setChecked(showDebug()); - popup->addSeparator(); - popup->addAction(action); - return popup; -} - -void ConfigInfoView::contextMenuEvent(QContextMenuEvent *e) -{ - Parent::contextMenuEvent(e); + contextMenu->popup(event->globalPos()); + event->accept(); } ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow *parent) diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 461df6419f15..0b97a9817d2b 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -215,6 +215,7 @@ public slots: class ConfigInfoView : public QTextBrowser { Q_OBJECT typedef class QTextBrowser Parent; + QMenu *contextMenu; public: ConfigInfoView(QWidget* parent, const char *name = 0); bool showDebug(void) const { return _showDebug; } @@ -235,8 +236,7 @@ public slots: QString debug_info(struct symbol *sym); static QString print_filter(const QString &str); static void expr_print_help(void *data, struct symbol *sym, const char *str); - QMenu *createStandardContextMenu(const QPoint & pos); - void contextMenuEvent(QContextMenuEvent *e); + void contextMenuEvent(QContextMenuEvent *event); struct symbol *sym; struct menu *_menu; From 66c262be8f50c043bf6d2f43fa8070e5d3ba7bc0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 18 Aug 2020 01:36:31 +0900 Subject: [PATCH 145/290] kconfig: qconf: remove unused colNr This is not used at all. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 0b97a9817d2b..f97376a8123f 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -30,7 +30,7 @@ class ConfigSettings : public QSettings { }; enum colIdx { - promptColIdx, nameColIdx, noColIdx, modColIdx, yesColIdx, dataColIdx, colNr + promptColIdx, nameColIdx, noColIdx, modColIdx, yesColIdx, dataColIdx }; enum listMode { singleMode, menuMode, symbolMode, fullMode, listMode From 45bc6098a3e279d8e391d22428396687562797e2 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 7 Jul 2020 12:43:24 -0700 Subject: [PATCH 146/290] EDAC/{i7core,sb,pnd2,skx}: Fix error event severity IA32_MCG_STATUS.RIPV indicates whether the return RIP value pushed onto the stack as part of machine check delivery is valid or not. Various drivers copied a code fragment that uses the RIPV bit to determine the severity of the error as either HW_EVENT_ERR_UNCORRECTED or HW_EVENT_ERR_FATAL, but this check is reversed (marking errors where RIPV is set as "FATAL"). Reverse the tests so that the error is marked fatal when RIPV is not set. Reported-by: Gabriele Paoloni Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20200707194324.14884-1-tony.luck@intel.com --- drivers/edac/i7core_edac.c | 4 ++-- drivers/edac/pnd2_edac.c | 2 +- drivers/edac/sb_edac.c | 4 ++-- drivers/edac/skx_common.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 5860ca41185c..2acd9f9284a2 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1710,9 +1710,9 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, if (uncorrected_error) { core_err_cnt = 1; if (ripv) - tp_event = HW_EVENT_ERR_FATAL; - else tp_event = HW_EVENT_ERR_UNCORRECTED; + else + tp_event = HW_EVENT_ERR_FATAL; } else { tp_event = HW_EVENT_ERR_CORRECTED; } diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index fd363746f5b0..b8fc4b84fd86 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1155,7 +1155,7 @@ static void pnd2_mce_output_error(struct mem_ctl_info *mci, const struct mce *m, u32 optypenum = GET_BITFIELD(m->status, 4, 6); int rc; - tp_event = uc_err ? (ripv ? HW_EVENT_ERR_FATAL : HW_EVENT_ERR_UNCORRECTED) : + tp_event = uc_err ? (ripv ? HW_EVENT_ERR_UNCORRECTED : HW_EVENT_ERR_FATAL) : HW_EVENT_ERR_CORRECTED; /* diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index d414698ca324..c5ab634cb6a4 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2982,9 +2982,9 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, if (uncorrected_error) { core_err_cnt = 1; if (ripv) { - tp_event = HW_EVENT_ERR_FATAL; - } else { tp_event = HW_EVENT_ERR_UNCORRECTED; + } else { + tp_event = HW_EVENT_ERR_FATAL; } } else { tp_event = HW_EVENT_ERR_CORRECTED; diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 6d8d6dc626bf..2b4ce8e5ac2f 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -493,9 +493,9 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, if (uncorrected_error) { core_err_cnt = 1; if (ripv) { - tp_event = HW_EVENT_ERR_FATAL; - } else { tp_event = HW_EVENT_ERR_UNCORRECTED; + } else { + tp_event = HW_EVENT_ERR_FATAL; } } else { tp_event = HW_EVENT_ERR_CORRECTED; From d963c524a4b3a7dbb2be0cb6813dcda1c16c9171 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 5 Aug 2020 15:11:11 +0100 Subject: [PATCH 147/290] RDMA/hns: Fix spelling mistake "epmty" -> "empty" There is a spelling mistake in a dev_dbg message. Fix it. Link: https://lore.kernel.org/r/20200805141111.22804-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 07b4c85d341d..aeb3a6fa7d47 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -535,7 +535,7 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val); dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep); - dev_dbg(dev, "ext SDB threshold: epmty: 0x%x, ful: 0x%x\n", + dev_dbg(dev, "ext SDB threshold: empty: 0x%x, ful: 0x%x\n", ext_sdb_alept, ext_sdb_alful); } From dfd022a9ea7ef6ffab95cfaa47aae4475a55e120 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 5 Aug 2020 15:14:59 +0100 Subject: [PATCH 148/290] RDMA/usnic: Fix spelling mistake "transistion" -> "transition" There is a spelling mistake in a usnic_err error message. Fix it. Link: https://lore.kernel.org/r/20200805141459.23069-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index c9abe1c01e4e..662e7fc7f628 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -120,7 +120,7 @@ static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev) IB_QPS_ERR, NULL); if (status) { - usnic_err("Failed to transistion qp grp %u from %s to %s\n", + usnic_err("Failed to transition qp grp %u from %s to %s\n", qp_grp->grp_id, usnic_ib_qp_grp_state_to_string (cur_state), From e65b85dd0cf1cc2c0453756e468ee9271822ec0b Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 18 Aug 2020 08:12:45 -0300 Subject: [PATCH 149/290] dt-bindings: Use Shawn Guo's preferred e-mail for i.MX bindings Use Shawn Guo's kernel.org address for the i.MX related bindings as per the MAINTAINERS entries. Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20200818111245.17047-1-festevam@gmail.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/clock/imx23-clock.yaml | 2 +- Documentation/devicetree/bindings/clock/imx28-clock.yaml | 2 +- Documentation/devicetree/bindings/gpio/gpio-mxs.yaml | 2 +- Documentation/devicetree/bindings/i2c/i2c-mxs.yaml | 2 +- Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml | 2 +- Documentation/devicetree/bindings/mmc/mxs-mmc.yaml | 2 +- Documentation/devicetree/bindings/pwm/mxs-pwm.yaml | 2 +- Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml | 2 +- Documentation/devicetree/bindings/thermal/imx-thermal.yaml | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/imx23-clock.yaml b/Documentation/devicetree/bindings/clock/imx23-clock.yaml index 66cb238a1040..ad21899981af 100644 --- a/Documentation/devicetree/bindings/clock/imx23-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx23-clock.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Clock bindings for Freescale i.MX23 maintainers: - - Shawn Guo + - Shawn Guo description: | The clock consumer should specify the desired clock by having the clock diff --git a/Documentation/devicetree/bindings/clock/imx28-clock.yaml b/Documentation/devicetree/bindings/clock/imx28-clock.yaml index 72328d5ca09a..f1af1108129e 100644 --- a/Documentation/devicetree/bindings/clock/imx28-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx28-clock.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Clock bindings for Freescale i.MX28 maintainers: - - Shawn Guo + - Shawn Guo description: | The clock consumer should specify the desired clock by having the clock diff --git a/Documentation/devicetree/bindings/gpio/gpio-mxs.yaml b/Documentation/devicetree/bindings/gpio/gpio-mxs.yaml index ccf5b50e798b..dfa1133f8c5e 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-mxs.yaml +++ b/Documentation/devicetree/bindings/gpio/gpio-mxs.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale MXS GPIO controller maintainers: - - Shawn Guo + - Shawn Guo - Anson Huang description: | diff --git a/Documentation/devicetree/bindings/i2c/i2c-mxs.yaml b/Documentation/devicetree/bindings/i2c/i2c-mxs.yaml index d3134ed775fa..21ae7bce038e 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mxs.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-mxs.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale MXS Inter IC (I2C) Controller maintainers: - - Shawn Guo + - Shawn Guo properties: compatible: diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml index 75dc1168d717..10b45966f1b8 100644 --- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml +++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale Enhanced Secure Digital Host Controller (eSDHC) for i.MX maintainers: - - Shawn Guo + - Shawn Guo allOf: - $ref: "mmc-controller.yaml" diff --git a/Documentation/devicetree/bindings/mmc/mxs-mmc.yaml b/Documentation/devicetree/bindings/mmc/mxs-mmc.yaml index 1cccc0478d49..bec8f8c71ff2 100644 --- a/Documentation/devicetree/bindings/mmc/mxs-mmc.yaml +++ b/Documentation/devicetree/bindings/mmc/mxs-mmc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale MXS MMC controller maintainers: - - Shawn Guo + - Shawn Guo description: | The Freescale MXS Synchronous Serial Ports (SSP) can act as a MMC controller diff --git a/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml b/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml index da68f4a25dd9..8740e076061e 100644 --- a/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale MXS PWM controller maintainers: - - Shawn Guo + - Shawn Guo - Anson Huang properties: diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml index 1b50cedbfb3e..50df1a40bbe3 100644 --- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml +++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale (Enhanced) Configurable Serial Peripheral Interface (CSPI/eCSPI) for i.MX maintainers: - - Shawn Guo + - Shawn Guo allOf: - $ref: "/schemas/spi/spi-controller.yaml#" diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.yaml b/Documentation/devicetree/bindings/thermal/imx-thermal.yaml index aedac1669998..16b57f57d103 100644 --- a/Documentation/devicetree/bindings/thermal/imx-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/imx-thermal.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: NXP i.MX Thermal Binding maintainers: - - Shawn Guo + - Shawn Guo - Anson Huang properties: From 3fb1a96a91120877488071a167d26d76be4be977 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 09:44:56 -0700 Subject: [PATCH 150/290] libbpf: Fix build on ppc64le architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ppc64le we get the following warning: In file included from btf_dump.c:16:0: btf_dump.c: In function ‘btf_dump_emit_struct_def’: ../include/linux/kernel.h:20:17: error: comparison of distinct pointer types lacks a cast [-Werror] (void) (&_max1 == &_max2); \ ^ btf_dump.c:882:11: note: in expansion of macro ‘max’ m_sz = max(0LL, btf__resolve_size(d->btf, m->type)); ^~~ Fix by explicitly casting to __s64, which is a return type from btf__resolve_size(). Fixes: 702eddc77a90 ("libbpf: Handle GCC built-in types for Arm NEON") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818164456.1181661-1-andriin@fb.com --- tools/lib/bpf/btf_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index fe39bd774697..57c00fa63932 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -879,7 +879,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, btf_dump_printf(d, ": %d", m_sz); off = m_off + m_sz; } else { - m_sz = max(0LL, btf__resolve_size(d->btf, m->type)); + m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type)); off = m_off + m_sz * 8; } btf_dump_printf(d, ";"); From e9a3cd48d65307b1a489300d7e3e0b28ae94dbfd Mon Sep 17 00:00:00 2001 From: brookxu Date: Fri, 7 Aug 2020 22:01:23 +0800 Subject: [PATCH 151/290] ext4: fix typos in ext4_mb_regular_allocator() comment Fix typos in ext4_mb_regular_allocator() comment Signed-off-by: Chunguang Xu Reviewed-by: Ritesh Harjani Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/d6514145-73b3-808b-ec5a-a8be27c51f9c@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8b8c84db243a..5be24244426a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2332,8 +2332,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) goto out; /* - * ac->ac2_order is set only if the fe_len is a power of 2 - * if ac2_order is set we also set criteria to 0 so that we + * ac->ac_2order is set only if the fe_len is a power of 2 + * if ac->ac_2order is set we also set criteria to 0 so that we * try exact allocation using buddy. */ i = fls(ac->ac_g_ex.fe_len); From 051e2ce8cb90592c8acdc7056ffed966310d91b4 Mon Sep 17 00:00:00 2001 From: brookxu Date: Fri, 7 Aug 2020 22:01:33 +0800 Subject: [PATCH 152/290] ext4: delete invalid comments near ext4_mb_check_limits() These comments do not seem to be related to ext4_mb_check_limits(), it may be invalid. Signed-off-by: Chunguang Xu Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/c49faf0c-d5d5-9c51-6911-9e0ff57c6bfa@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 5be24244426a..63ffd0057471 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1740,10 +1740,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, } -/* - * regular allocator, for general purposes allocation - */ - static void ext4_mb_check_limits(struct ext4_allocation_context *ac, struct ext4_buddy *e4b, int finish_group) From dddcd2f9ebdeca9fbd36526e950bbcd0f7c1765f Mon Sep 17 00:00:00 2001 From: brookxu Date: Fri, 7 Aug 2020 22:01:39 +0800 Subject: [PATCH 153/290] ext4: optimize the implementation of ext4_mb_good_group() It might be better to adjust the code in two places: 1. Determine whether grp is currupt or not should be placed first. 2. (cr<=2 && free ac_g_ex.fe_len)should may belong to the crx strategy, and it may be more appropriate to put it in the subsequent switch statement block. For cr1, cr2, the conditions in switch potentially realize the above judgment. For cr0, we should add (free ac_g_ex.fe_len) judgment, and then delete (free / fragments) >= ac->ac_g_ex.fe_len), because cr0 returns true by default. Signed-off-by: Chunguang Xu Reviewed-by: Andreas Dilger Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/e20b2d8f-1154-adb7-3831-a9e11ba842e9@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 63ffd0057471..f68730eec205 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2112,13 +2112,11 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac, BUG_ON(cr < 0 || cr >= 4); - free = grp->bb_free; - if (free == 0) - return false; - if (cr <= 2 && free < ac->ac_g_ex.fe_len) + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) return false; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) + free = grp->bb_free; + if (free == 0) return false; fragments = grp->bb_fragments; @@ -2135,8 +2133,10 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac, ((group % flex_size) == 0)) return false; - if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) || - (free / fragments) >= ac->ac_g_ex.fe_len) + if (free < ac->ac_g_ex.fe_len) + return false; + + if (ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) return true; if (grp->bb_largest_free_order < ac->ac_2order) From e0d438c72abe0cb000d67c0795b0341f9e763836 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 10 Aug 2020 02:21:58 +0000 Subject: [PATCH 154/290] mballoc: replace seq_printf with seq_puts seq_puts is a lot cheaper than seq_printf, so use that to print literal strings. Signed-off-by: Xu Wang Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/20200810022158.9167-1-vulab@iscas.ac.cn Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f68730eec205..51e2fe6094be 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2558,7 +2558,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? sg.info.bb_counters[i] : 0); - seq_printf(seq, " ]\n"); + seq_puts(seq, " ]\n"); return 0; } From 2fe34d2938181b26f86bceb7b87fbf7370bd92c4 Mon Sep 17 00:00:00 2001 From: Kyoungho Koo Date: Mon, 10 Aug 2020 17:07:05 +0900 Subject: [PATCH 155/290] ext4: remove unused parameter of ext4_generic_delete_entry function The ext4_generic_delete_entry function does not use the parameter handle, so it can be removed. Signed-off-by: Kyoungho Koo Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/20200810080701.GA14160@koo-Z370-HD3 Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 +-- fs/ext4/inline.c | 2 +- fs/ext4/namei.c | 6 ++---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3994f012a9de..71b4370a3f91 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2802,8 +2802,7 @@ extern int ext4_search_dir(struct buffer_head *bh, struct ext4_filename *fname, unsigned int offset, struct ext4_dir_entry_2 **res_dir); -extern int ext4_generic_delete_entry(handle_t *handle, - struct inode *dir, +extern int ext4_generic_delete_entry(struct inode *dir, struct ext4_dir_entry_2 *de_del, struct buffer_head *bh, void *entry_buf, diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index c3a1ad2db122..13054653a06a 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1706,7 +1706,7 @@ int ext4_delete_inline_entry(handle_t *handle, if (err) goto out; - err = ext4_generic_delete_entry(handle, dir, de_del, bh, + err = ext4_generic_delete_entry(dir, de_del, bh, inline_start, inline_size, 0); if (err) goto out; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b92571beab72..153a9fbe1dd0 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2465,8 +2465,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, * ext4_generic_delete_entry deletes a directory entry by merging it * with the previous entry */ -int ext4_generic_delete_entry(handle_t *handle, - struct inode *dir, +int ext4_generic_delete_entry(struct inode *dir, struct ext4_dir_entry_2 *de_del, struct buffer_head *bh, void *entry_buf, @@ -2527,8 +2526,7 @@ static int ext4_delete_entry(handle_t *handle, if (unlikely(err)) goto out; - err = ext4_generic_delete_entry(handle, dir, de_del, - bh, bh->b_data, + err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data, dir->i_sb->s_blocksize, csum_size); if (err) goto out; From 70d7ced2ed4dc3f7f21b4f4fec0e5be5c2a5a55c Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Mon, 10 Aug 2020 07:44:35 -0400 Subject: [PATCH 156/290] ext4: change to use fallthrough macro Change to use fallthrough macro in switch case. Signed-off-by: Shijie Luo Reviewed-by: Ritesh Harjani Link: https://lore.kernel.org/r/20200810114435.24182-1-luoshijie1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/hash.c | 4 ++-- fs/ext4/indirect.c | 12 ++++++------ fs/ext4/readpage.c | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 3e133793a5a3..2924261226e0 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -233,7 +233,7 @@ static int __ext4fs_dirhash(const char *name, int len, break; case DX_HASH_HALF_MD4_UNSIGNED: str2hashbuf = str2hashbuf_unsigned; - /* fall through */ + fallthrough; case DX_HASH_HALF_MD4: p = name; while (len > 0) { @@ -247,7 +247,7 @@ static int __ext4fs_dirhash(const char *name, int len, break; case DX_HASH_TEA_UNSIGNED: str2hashbuf = str2hashbuf_unsigned; - /* fall through */ + fallthrough; case DX_HASH_TEA: p = name; while (len > 0) { diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 402641825712..433ca8415c5a 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1180,21 +1180,21 @@ void ext4_ind_truncate(handle_t *handle, struct inode *inode) ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); i_data[EXT4_IND_BLOCK] = 0; } - /* fall through */ + fallthrough; case EXT4_IND_BLOCK: nr = i_data[EXT4_DIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); i_data[EXT4_DIND_BLOCK] = 0; } - /* fall through */ + fallthrough; case EXT4_DIND_BLOCK: nr = i_data[EXT4_TIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); i_data[EXT4_TIND_BLOCK] = 0; } - /* fall through */ + fallthrough; case EXT4_TIND_BLOCK: ; } @@ -1434,7 +1434,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); i_data[EXT4_IND_BLOCK] = 0; } - /* fall through */ + fallthrough; case EXT4_IND_BLOCK: if (++n >= n2) break; @@ -1443,7 +1443,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); i_data[EXT4_DIND_BLOCK] = 0; } - /* fall through */ + fallthrough; case EXT4_DIND_BLOCK: if (++n >= n2) break; @@ -1452,7 +1452,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); i_data[EXT4_TIND_BLOCK] = 0; } - /* fall through */ + fallthrough; case EXT4_TIND_BLOCK: ; } diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 5761e9961682..153a838e9e3c 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -140,7 +140,7 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx) return; } ctx->cur_step++; - /* fall-through */ + fallthrough; case STEP_VERITY: if (ctx->enabled_steps & (1 << STEP_VERITY)) { INIT_WORK(&ctx->work, verity_work); @@ -148,7 +148,7 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx) return; } ctx->cur_step++; - /* fall-through */ + fallthrough; default: __read_end_io(ctx->bio); } From b7333b58f358f38d90d78e00c1ee5dec82df10ad Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 14 Aug 2020 21:30:41 -0700 Subject: [PATCH 157/290] mm/memory.c: skip spurious TLB flush for retried page fault Recently we found regression when running will_it_scale/page_fault3 test on ARM64. Over 70% down for the multi processes cases and over 20% down for the multi threads cases. It turns out the regression is caused by commit 89b15332af7c ("mm: drop mmap_sem before calling balance_dirty_pages() in write fault"). The test mmaps a memory size file then write to the mapping, this would make all memory dirty and trigger dirty pages throttle, that upstream commit would release mmap_sem then retry the page fault. The retried page fault would see correct PTEs installed then just fall through to spurious TLB flush. The regression is caused by the excessive spurious TLB flush. It is fine on x86 since x86's spurious TLB flush is no-op. We could just skip the spurious TLB flush to mitigate the regression. Suggested-by: Linus Torvalds Reported-by: Xu Yu Debugged-by: Xu Yu Tested-by: Xu Yu Cc: Johannes Weiner Cc: Catalin Marinas Cc: Will Deacon Cc: Signed-off-by: Yang Shi Signed-off-by: Linus Torvalds --- mm/memory.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 3a7779d9891d..602f4283122f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4247,6 +4247,9 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) vmf->flags & FAULT_FLAG_WRITE)) { update_mmu_cache(vmf->vma, vmf->address, vmf->pte); } else { + /* Skip spurious TLB flush for retried page fault */ + if (vmf->flags & FAULT_FLAG_TRIED) + goto unlock; /* * This is needed only for protection faults but the arch code * is not yet telling us if this is a protection fault or not. From 8b61fba503904acae24aeb2bd5569b4d6544d48f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Tue, 18 Aug 2020 10:51:34 +0200 Subject: [PATCH 158/290] macvlan: validate setting of multiple remote source MAC addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remote source MAC addresses can be set on a 'source mode' macvlan interface via the IFLA_MACVLAN_MACADDR_DATA attribute. This commit tightens the validation of these MAC addresses to match the validation already performed when setting or adding a single MAC address via the IFLA_MACVLAN_MACADDR attribute. iproute2 uses IFLA_MACVLAN_MACADDR_DATA for its 'macvlan macaddr set' command, and IFLA_MACVLAN_MACADDR for its 'macvlan macaddr add' command, which demonstrates the inconsistent behaviour that this commit addresses: # ip link add link eth0 name macvlan0 type macvlan mode source # ip link set link dev macvlan0 type macvlan macaddr add 01:00:00:00:00:00 RTNETLINK answers: Cannot assign requested address # ip link set link dev macvlan0 type macvlan macaddr set 01:00:00:00:00:00 # ip -d link show macvlan0 5: macvlan0@eth0: mtu 1500 ... link/ether 2e:ac:fd:2d:69:f8 brd ff:ff:ff:ff:ff:ff promiscuity 0 macvlan mode source remotes (1) 01:00:00:00:00:00 numtxqueues 1 ... With this change, the 'set' command will (rightly) fail in the same way as the 'add' command. Signed-off-by: Alvin Šipraga Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 4942f6112e51..5da04e997989 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1269,6 +1269,9 @@ static void macvlan_port_destroy(struct net_device *dev) static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { + struct nlattr *nla, *head; + int rem, len; + if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; @@ -1316,6 +1319,20 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], return -EADDRNOTAVAIL; } + if (data[IFLA_MACVLAN_MACADDR_DATA]) { + head = nla_data(data[IFLA_MACVLAN_MACADDR_DATA]); + len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]); + + nla_for_each_attr(nla, head, len, rem) { + if (nla_type(nla) != IFLA_MACVLAN_MACADDR || + nla_len(nla) != ETH_ALEN) + return -EINVAL; + + if (!is_valid_ether_addr(nla_data(nla))) + return -EADDRNOTAVAIL; + } + } + if (data[IFLA_MACVLAN_MACADDR_COUNT]) return -EINVAL; @@ -1372,10 +1389,6 @@ static int macvlan_changelink_sources(struct macvlan_dev *vlan, u32 mode, len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]); nla_for_each_attr(nla, head, len, rem) { - if (nla_type(nla) != IFLA_MACVLAN_MACADDR || - nla_len(nla) != ETH_ALEN) - continue; - addr = nla_data(nla); ret = macvlan_hash_add_source(vlan, addr); if (ret) From db06ea341fcd1752fbdb58454507faa140e3842f Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 18 Aug 2020 13:43:30 +0100 Subject: [PATCH 159/290] sfc: really check hash is valid before using it Actually hook up the .rx_buf_hash_valid method in EF100's nic_type. Fixes: 068885434ccb ("sfc: check hash is valid before using it") Reported-by: Martin Habets Signed-off-by: Edward Cree Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100_nic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 206d70f9d95b..b8a7e9ed7913 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -739,6 +739,7 @@ const struct efx_nic_type ef100_pf_nic_type = { .rx_remove = efx_mcdi_rx_remove, .rx_write = ef100_rx_write, .rx_packet = __ef100_rx_packet, + .rx_buf_hash_valid = ef100_rx_buf_hash_valid, .fini_dmaq = efx_fini_dmaq, .max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS, .filter_table_probe = ef100_filter_table_up, @@ -820,6 +821,7 @@ const struct efx_nic_type ef100_vf_nic_type = { .rx_remove = efx_mcdi_rx_remove, .rx_write = ef100_rx_write, .rx_packet = __ef100_rx_packet, + .rx_buf_hash_valid = ef100_rx_buf_hash_valid, .fini_dmaq = efx_fini_dmaq, .max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS, .filter_table_probe = ef100_filter_table_up, From 9cbbc451098ec1e9942886023203b2247dec94bd Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 18 Aug 2020 13:43:57 +0100 Subject: [PATCH 160/290] sfc: take correct lock in ef100_reset() When downing and upping the ef100 filter table, we need to take a write lock on efx->filter_sem, not just a read lock, because we may kfree() the table pointers. Without this, resets cause a WARN_ON from efx_rwsem_assert_write_locked(). Fixes: a9dc3d5612ce ("sfc_ef100: RX filter table management and related gubbins") Signed-off-by: Edward Cree Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100_nic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index b8a7e9ed7913..19fe86b3b316 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -431,18 +431,18 @@ static int ef100_reset(struct efx_nic *efx, enum reset_type reset_type) /* A RESET_TYPE_ALL will cause filters to be removed, so we remove filters * and reprobe after reset to avoid removing filters twice */ - down_read(&efx->filter_sem); + down_write(&efx->filter_sem); ef100_filter_table_down(efx); - up_read(&efx->filter_sem); + up_write(&efx->filter_sem); rc = efx_mcdi_reset(efx, reset_type); if (rc) return rc; netif_device_attach(efx->net_dev); - down_read(&efx->filter_sem); + down_write(&efx->filter_sem); rc = ef100_filter_table_up(efx); - up_read(&efx->filter_sem); + up_write(&efx->filter_sem); if (rc) return rc; From 788f920a0f137baa4dbc1efdd5039c4a0a01b8d7 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 18 Aug 2020 13:44:18 +0100 Subject: [PATCH 161/290] sfc: null out channel->rps_flow_id after freeing it If an ef100_net_open() fails, ef100_net_stop() may be called without channel->rps_flow_id having been written; thus it may hold the address freed by a previous ef100_net_stop()'s call to efx_remove_filters(). This then causes a double-free when efx_remove_filters() is called again, leading to a panic. To prevent this, after freeing it, overwrite it with NULL. Fixes: a9dc3d5612ce ("sfc_ef100: RX filter table management and related gubbins") Signed-off-by: Edward Cree Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/rx_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index ef9bca92b0b7..5e29284c89c9 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -849,6 +849,7 @@ void efx_remove_filters(struct efx_nic *efx) efx_for_each_channel(channel, efx) { cancel_delayed_work_sync(&channel->filter_work); kfree(channel->rps_flow_id); + channel->rps_flow_id = NULL; } #endif down_write(&efx->filter_sem); From e6a43910d55d09dae65772ad571d4c61e459b17a Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 18 Aug 2020 13:44:50 +0100 Subject: [PATCH 162/290] sfc: don't free_irq()s if they were never requested If efx_nic_init_interrupt fails, or was never run (e.g. due to an earlier failure in ef100_net_open), freeing irqs in efx_nic_fini_interrupt is not needed and will cause error messages and stack traces. So instead, only do this if efx_nic_init_interrupt successfully completed, as indicated by the new efx->irqs_hooked flag. Fixes: 965b549f3c20 ("sfc_ef100: implement ndo_open/close and EVQ probing") Signed-off-by: Edward Cree Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/net_driver.h | 2 ++ drivers/net/ethernet/sfc/nic.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index dcb741d8bd11..062462a13847 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -846,6 +846,7 @@ struct efx_async_filter_insertion { * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds * @timer_max_ns: Interrupt timer maximum value, in nanoseconds * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues + * @irqs_hooked: Channel interrupts are hooked * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues * @irq_rx_moderation_us: IRQ moderation time for RX event queues * @msg_enable: Log message enable flags @@ -1004,6 +1005,7 @@ struct efx_nic { unsigned int timer_quantum_ns; unsigned int timer_max_ns; bool irq_rx_adaptive; + bool irqs_hooked; unsigned int irq_mod_step_us; unsigned int irq_rx_moderation_us; u32 msg_enable; diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index d994d136bb03..d1e908846f5d 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -129,6 +129,7 @@ int efx_nic_init_interrupt(struct efx_nic *efx) #endif } + efx->irqs_hooked = true; return 0; fail2: @@ -154,6 +155,8 @@ void efx_nic_fini_interrupt(struct efx_nic *efx) efx->net_dev->rx_cpu_rmap = NULL; #endif + if (!efx->irqs_hooked) + return; if (EFX_INT_MODE_USE_MSI(efx)) { /* Disable MSI/MSI-X interrupts */ efx_for_each_channel(channel, efx) @@ -163,6 +166,7 @@ void efx_nic_fini_interrupt(struct efx_nic *efx) /* Disable legacy interrupt */ free_irq(efx->legacy_irq, efx); } + efx->irqs_hooked = false; } /* Register dump */ From 335956421c86f64fd46186d76d3961f6adcff187 Mon Sep 17 00:00:00 2001 From: Ganji Aravind Date: Tue, 18 Aug 2020 21:10:57 +0530 Subject: [PATCH 163/290] cxgb4: Fix work request size calculation for loopback test Work request used for sending loopback packet needs to add the firmware work request only once. So, fix by using correct structure size. Fixes: 7235ffae3d2c ("cxgb4: add loopback ethtool self-test") Signed-off-by: Ganji Aravind Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index d2b587d1670a..7c9fe4bc235b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2553,8 +2553,8 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) pkt_len = ETH_HLEN + sizeof(CXGB4_SELFTEST_LB_STR); - flits = DIV_ROUND_UP(pkt_len + sizeof(struct cpl_tx_pkt) + - sizeof(*wr), sizeof(__be64)); + flits = DIV_ROUND_UP(pkt_len + sizeof(*cpl) + sizeof(*wr), + sizeof(__be64)); ndesc = flits_to_desc(flits); lb = &pi->ethtool_lb; From c650e04898072e4b579cbf8d9dd5b86bcdbe9b00 Mon Sep 17 00:00:00 2001 From: Ganji Aravind Date: Tue, 18 Aug 2020 21:10:58 +0530 Subject: [PATCH 164/290] cxgb4: Fix race between loopback and normal Tx path Even after Tx queues are marked stopped, there exists a small window where the current packet in the normal Tx path is still being sent out and loopback selftest ends up corrupting the same Tx ring. So, ensure selftest takes the Tx lock to synchronize access the Tx ring. Fixes: 7235ffae3d2c ("cxgb4: add loopback ethtool self-test") Signed-off-by: Ganji Aravind Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 7c9fe4bc235b..869431a1eedd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2561,11 +2561,14 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) lb->loopback = 1; q = &adap->sge.ethtxq[pi->first_qset]; + __netif_tx_lock(q->txq, smp_processor_id()); reclaim_completed_tx(adap, &q->q, -1, true); credits = txq_avail(&q->q) - ndesc; - if (unlikely(credits < 0)) + if (unlikely(credits < 0)) { + __netif_tx_unlock(q->txq); return -ENOMEM; + } wr = (void *)&q->q.desc[q->q.pidx]; memset(wr, 0, sizeof(struct tx_desc)); @@ -2598,6 +2601,7 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) init_completion(&lb->completion); txq_advance(&q->q, ndesc); cxgb4_ring_tx_db(adap, &q->q, ndesc); + __netif_tx_unlock(q->txq); /* wait for the pkt to return */ ret = wait_for_completion_timeout(&lb->completion, 10 * HZ); From 989e4da042ca4a56bbaca9223d1a93639ad11e17 Mon Sep 17 00:00:00 2001 From: Sumera Priyadarsini Date: Wed, 19 Aug 2020 00:22:41 +0530 Subject: [PATCH 165/290] net: gianfar: Add of_node_put() before goto statement Every iteration of for_each_available_child_of_node() decrements reference count of the previous node, however when control is transferred from the middle of the loop, as in the case of a return or break or goto, there is no decrement thus ultimately resulting in a memory leak. Fix a potential memory leak in gianfar.c by inserting of_node_put() before the goto statement. Issue found with Coccinelle. Signed-off-by: Sumera Priyadarsini Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index b513b8c5c3b5..41dd3d0f3452 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -750,8 +750,10 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) continue; err = gfar_parse_group(child, priv, model); - if (err) + if (err) { + of_node_put(child); goto err_grp_init; + } } } else { /* SQ_SG_MODE */ err = gfar_parse_group(np, priv, model); From 9c9b17a7d19a8e21db2e378784fff1128b46c9d3 Mon Sep 17 00:00:00 2001 From: Jiansong Chen Date: Wed, 12 Aug 2020 15:57:32 +0800 Subject: [PATCH 166/290] drm/amdgpu: disable gfxoff for navy_flounder gfxoff is temporarily disabled for navy_flounder, since at present the feature has broken some basic amdgpu test. Signed-off-by: Jiansong Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 65997ffaed45..142ffe2da758 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3595,6 +3595,9 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) if (!gfx_v10_0_navi10_gfxoff_should_enable(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; break; + case CHIP_NAVY_FLOUNDER: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + break; default: break; } From f5261bec20f4ed1795d7873ffcda649db686f195 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Thu, 6 Aug 2020 23:41:47 +0800 Subject: [PATCH 167/290] drm/amdgpu: fix uninit-value in arcturus_log_thermal_throttling_event() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when function arcturus_get_smu_metrics_data() call failed, it will cause the variable "throttler_status" isn't initialized before use. warning: powerplay/arcturus_ppt.c:2268:24: warning: ‘throttler_status’ may be used uninitialized in this function [-Wmaybe-uninitialized] 2268 | if (throttler_status & logging_label[throttler_idx].feature_mask) { Signed-off-by: Kevin Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 6c991de8f371..fb962b9ceffb 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -2204,14 +2204,17 @@ static const struct throttling_logging_label { }; static void arcturus_log_thermal_throttling_event(struct smu_context *smu) { + int ret; int throttler_idx, throtting_events = 0, buf_idx = 0; struct amdgpu_device *adev = smu->adev; uint32_t throttler_status; char log_buf[256]; - arcturus_get_smu_metrics_data(smu, - METRICS_THROTTLER_STATUS, - &throttler_status); + ret = arcturus_get_smu_metrics_data(smu, + METRICS_THROTTLER_STATUS, + &throttler_status); + if (ret) + return; memset(log_buf, 0, sizeof(log_buf)); for (throttler_idx = 0; throttler_idx < ARRAY_SIZE(logging_label); From 1a68d96f81b8e7eb2a121fbf9abf9e5974e58832 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 13 Aug 2020 14:35:35 +0800 Subject: [PATCH 168/290] drm/amdgpu: fix NULL pointer access issue when unloading driver When unloading driver by "modprobe -r amdgpu", one NULL pointer dereference bug occurs in ras debugfs releasing. The cause is the duplicated debugfs_remove, as drm debugfs_root dir has been cleaned up already by drm_minor_unregister. BUG: kernel NULL pointer dereference, address: 00000000000000a0 PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 11 PID: 1526 Comm: modprobe Tainted: G OE 5.6.0-guchchen #1 Hardware name: System manufacturer System Product Name/TUF Z370-PLUS GAMING II, BIOS 0411 09/21/2018 RIP: 0010:down_write+0x15/0x40 Code: eb de e8 7e 17 72 ff cc cc cc cc cc cc cc cc cc cc cc cc cc cc 0f 1f 44 00 00 53 48 89 fb e8 92 d8 ff ff 31 c0 ba 01 00 00 00 48 0f b1 13 75 0f 65 48 8b 04 25 c0 8b 01 00 48 89 43 08 5b c3 RSP: 0018:ffffb1590386fcd0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000000000a0 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffff85b2fcc2 RDI: 00000000000000a0 RBP: ffffb1590386fd30 R08: ffffffff85b2fcc2 R09: 000000000002b3c0 R10: ffff97a330618c40 R11: 00000000000005f6 R12: ffff97a3481beb40 R13: 00000000000000a0 R14: ffff97a3481beb40 R15: 0000000000000000 FS: 00007fb11a717540(0000) GS:ffff97a376cc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000a0 CR3: 00000004066d6006 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: simple_recursive_removal+0x63/0x370 ? debugfs_remove+0x60/0x60 debugfs_remove+0x40/0x60 amdgpu_ras_fini+0x82/0x230 [amdgpu] ? __kernfs_remove.part.17+0x101/0x1f0 ? kernfs_name_hash+0x12/0x80 amdgpu_device_fini+0x1c0/0x580 [amdgpu] amdgpu_driver_unload_kms+0x3e/0x70 [amdgpu] amdgpu_pci_remove+0x36/0x60 [amdgpu] pci_device_remove+0x3b/0xb0 device_release_driver_internal+0xe5/0x1c0 driver_detach+0x46/0x90 bus_remove_driver+0x58/0xd0 pci_unregister_driver+0x29/0x90 amdgpu_exit+0x11/0x25 [amdgpu] __x64_sys_delete_module+0x13d/0x210 do_syscall_64+0x5f/0x250 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Guchun Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index bcce4c0be462..1bedb416eebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1243,7 +1243,6 @@ void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, if (!obj || !obj->ent) return; - debugfs_remove(obj->ent); obj->ent = NULL; put_obj(obj); } @@ -1257,7 +1256,6 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) amdgpu_ras_debugfs_remove(adev, &obj->head); } - debugfs_remove_recursive(con->dir); con->dir = NULL; } /* debugfs end */ From 8452fd0ce657a4313dfa784f11320971b67727fd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 18 Aug 2020 13:58:33 -0700 Subject: [PATCH 169/290] io_uring: cleanup io_import_iovec() of pre-mapped request io_rw_prep_async() goes through a dance of clearing req->io, calling the iovec import, then re-setting req->io. Provide an internal helper that does the right thing without needing state tweaked to get there. This enables further cleanups in io_read, io_write, and io_resubmit_prep(), but that's left for another time. Signed-off-by: Jens Axboe --- fs/io_uring.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 4b102d9ad846..e325895d681b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2819,22 +2819,15 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, return __io_iov_buffer_select(req, iov, needs_lock); } -static ssize_t io_import_iovec(int rw, struct io_kiocb *req, - struct iovec **iovec, struct iov_iter *iter, - bool needs_lock) +static ssize_t __io_import_iovec(int rw, struct io_kiocb *req, + struct iovec **iovec, struct iov_iter *iter, + bool needs_lock) { void __user *buf = u64_to_user_ptr(req->rw.addr); size_t sqe_len = req->rw.len; ssize_t ret; u8 opcode; - if (req->io) { - struct io_async_rw *iorw = &req->io->rw; - - *iovec = NULL; - return iov_iter_count(&iorw->iter); - } - opcode = req->opcode; if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { *iovec = NULL; @@ -2879,6 +2872,16 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter); } +static ssize_t io_import_iovec(int rw, struct io_kiocb *req, + struct iovec **iovec, struct iov_iter *iter, + bool needs_lock) +{ + if (!req->io) + return __io_import_iovec(rw, req, iovec, iter, needs_lock); + *iovec = NULL; + return iov_iter_count(&req->io->rw.iter); +} + /* * For files that don't have ->read_iter() and ->write_iter(), handle them * by looping over ->read() or ->write() manually. @@ -3001,11 +3004,8 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw, ssize_t ret; iorw->iter.iov = iorw->fast_iov; - /* reset ->io around the iovec import, we don't want to use it */ - req->io = NULL; - ret = io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov, + ret = __io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov, &iorw->iter, !force_nonblock); - req->io = container_of(iorw, struct io_async_ctx, rw); if (unlikely(ret < 0)) return ret; From 0a668aee0ac288cf36a554eb7f270be2ca8ef476 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Tue, 28 Jul 2020 11:35:54 -0400 Subject: [PATCH 170/290] drm/amdgpu: parse ta firmware for navy_flounder Use the same case as sienna_cichlid Signed-off-by: Bhawanpreet Lakha Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index d488d250805d..e16874f30d5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -179,12 +179,11 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) } break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: err = psp_init_ta_microcode(&adev->psp, chip_name); if (err) return err; break; - case CHIP_NAVY_FLOUNDER: - break; default: BUG(); } From 34174b89bfa495bed9cddcc504fb38feca90fab7 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 11 Aug 2020 13:54:56 +0800 Subject: [PATCH 171/290] drm/amdkfd: fix the wrong sdma instance query for renoir Renoir only has one sdma instance, it will get failed once query the sdma1 registers. So use switch-case instead of static register array. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index c7fd0c47b254..1102de76d876 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -195,19 +195,32 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base[2] = { - SOC15_REG_OFFSET(SDMA0, 0, - mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA1, 0, - mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL - }; - uint32_t retval = sdma_engine_reg_base[engine_id] + uint32_t sdma_engine_reg_base = 0; + uint32_t sdma_rlc_reg_offset; + + switch (engine_id) { + default: + dev_warn(adev->dev, + "Invalid sdma engine id (%d), using engine id 0\n", + engine_id); + fallthrough; + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, - queue_id, retval); + queue_id, sdma_rlc_reg_offset); - return retval; + return sdma_rlc_reg_offset; } static inline struct v9_mqd *get_mqd(void *mqd) From 5dea2142a86d9aafca07f1318851fb729577f03a Mon Sep 17 00:00:00 2001 From: Chris Park Date: Wed, 5 Aug 2020 13:46:40 -0400 Subject: [PATCH 172/290] drm/amd/display: Call DMUB for eDP power control [Why] If DMUB is used, LVTMA VBIOS call can be used to control eDP instead of tranditional transmitter control. Interface is agreed with VBIOS for eDP to use this new path to program LVTMA registers. [How] Create DAL interface to send DMUB command for LVTMA as currently implemented in VBIOS. Signed-off-by: Chris Park Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/bios/bios_parser2.c | 16 ++++++++++- .../drm/amd/display/dc/bios/command_table2.c | 28 +++++++++++++++++++ .../drm/amd/display/dc/bios/command_table2.h | 3 +- .../gpu/drm/amd/display/dc/dc_bios_types.h | 4 +++ .../display/dc/dce110/dce110_hw_sequencer.c | 24 ++++++++++++++++ .../amd/display/include/bios_parser_types.h | 7 +++++ 6 files changed, 80 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 078b7e344185..2d5c7daaee23 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1108,6 +1108,18 @@ static enum bp_result bios_parser_enable_disp_power_gating( action); } +static enum bp_result bios_parser_enable_lvtma_control( + struct dc_bios *dcb, + uint8_t uc_pwr_on) +{ + struct bios_parser *bp = BP_FROM_DCB(dcb); + + if (!bp->cmd_tbl.enable_lvtma_control) + return BP_RESULT_FAILURE; + + return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on); +} + static bool bios_parser_is_accelerated_mode( struct dc_bios *dcb) { @@ -2208,7 +2220,9 @@ static const struct dc_vbios_funcs vbios_funcs = { .get_board_layout_info = bios_get_board_layout_info, .pack_data_tables = bios_parser_pack_data_tables, - .get_atom_dc_golden_table = bios_get_atom_dc_golden_table + .get_atom_dc_golden_table = bios_get_atom_dc_golden_table, + + .enable_lvtma_control = bios_parser_enable_lvtma_control }; static bool bios_parser2_construct( diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index bed91572f82a..eb3ae5c3677c 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -904,6 +904,33 @@ static unsigned int get_smu_clock_info_v3_1(struct bios_parser *bp, uint8_t id) return 0; } +/****************************************************************************** + ****************************************************************************** + ** + ** LVTMA CONTROL + ** + ****************************************************************************** + *****************************************************************************/ + +static enum bp_result enable_lvtma_control( + struct bios_parser *bp, + uint8_t uc_pwr_on); + +static void init_enable_lvtma_control(struct bios_parser *bp) +{ + /* TODO add switch for table vrsion */ + bp->cmd_tbl.enable_lvtma_control = enable_lvtma_control; + +} + +static enum bp_result enable_lvtma_control( + struct bios_parser *bp, + uint8_t uc_pwr_on) +{ + enum bp_result result = BP_RESULT_FAILURE; + return result; +} + void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp) { init_dig_encoder_control(bp); @@ -919,4 +946,5 @@ void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp) init_set_dce_clock(bp); init_get_smu_clock_info(bp); + init_enable_lvtma_control(bp); } diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h index 7a2af24dfe60..7bdce013cde5 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h @@ -94,7 +94,8 @@ struct cmd_tbl { struct bp_set_dce_clock_parameters *bp_params); unsigned int (*get_smu_clock_info)( struct bios_parser *bp, uint8_t id); - + enum bp_result (*enable_lvtma_control)(struct bios_parser *bp, + uint8_t uc_pwr_on); }; void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp); diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index d06d07042a12..0811f941f430 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -136,6 +136,10 @@ struct dc_vbios_funcs { enum bp_result (*get_atom_dc_golden_table)( struct dc_bios *dcb); + + enum bp_result (*enable_lvtma_control)( + struct dc_bios *bios, + uint8_t uc_pwr_on); }; struct bios_registers { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 49380ed3aeae..45c9e9027886 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -842,6 +842,17 @@ void dce110_edp_power_control( cntl.coherent = false; cntl.lanes_number = LANE_COUNT_FOUR; cntl.hpd_sel = link->link_enc->hpd_source; + + if (ctx->dc->ctx->dmub_srv && + ctx->dc->debug.dmub_command_table) { + if (cntl.action == TRANSMITTER_CONTROL_POWER_ON) + bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, + LVTMA_CONTROL_POWER_ON); + else + bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, + LVTMA_CONTROL_POWER_OFF); + } + bp_result = link_transmitter_control(ctx->dc_bios, &cntl); if (!power_up) @@ -919,8 +930,21 @@ void dce110_edp_backlight_control( /*edp 1.2*/ if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) edp_receiver_ready_T7(link); + + if (ctx->dc->ctx->dmub_srv && + ctx->dc->debug.dmub_command_table) { + if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) + ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, + LVTMA_CONTROL_LCD_BLON); + else + ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, + LVTMA_CONTROL_LCD_BLOFF); + } + link_transmitter_control(ctx->dc_bios, &cntl); + + if (enable && link->dpcd_sink_ext_caps.bits.oled) msleep(OLED_POST_T7_DELAY); diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h index c30437ae8395..21011edea337 100644 --- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h +++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h @@ -101,6 +101,13 @@ enum bp_pipe_control_action { ASIC_PIPE_INIT }; +enum bp_lvtma_control_action { + LVTMA_CONTROL_LCD_BLOFF = 2, + LVTMA_CONTROL_LCD_BLON = 3, + LVTMA_CONTROL_POWER_ON = 12, + LVTMA_CONTROL_POWER_OFF = 13 +}; + struct bp_encoder_control { enum bp_encoder_control_action action; enum engine_id engine_id; From 42f8c416521aced2daba61e42d631cc3be2c452b Mon Sep 17 00:00:00 2001 From: Chris Park Date: Wed, 5 Aug 2020 16:59:28 -0400 Subject: [PATCH 173/290] drm/amd/display: Assign correct left shift [Why] Reading for DP alt registers return incorrect values due to LE_SF definition missing. [How] Define correct LE_SF or DP alt registers. Signed-off-by: Chris Park Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h | 4 +++- .../gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h index bf0044f7417e..dcbf28dd72d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h @@ -167,7 +167,9 @@ LE_SF(DCIO_SOFT_RESET, UNIPHYB_SOFT_RESET, mask_sh),\ LE_SF(DCIO_SOFT_RESET, UNIPHYC_SOFT_RESET, mask_sh),\ LE_SF(DCIO_SOFT_RESET, UNIPHYD_SOFT_RESET, mask_sh),\ - LE_SF(DCIO_SOFT_RESET, UNIPHYE_SOFT_RESET, mask_sh) + LE_SF(DCIO_SOFT_RESET, UNIPHYE_SOFT_RESET, mask_sh),\ + LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, mask_sh),\ + LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, mask_sh) #define LINK_ENCODER_MASK_SH_LIST_DCN20(mask_sh)\ LINK_ENCODER_MASK_SH_LIST_DCN10(mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h index 8e9fd59ccde8..2fbf879cd327 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h @@ -61,7 +61,10 @@ DPCS_DCN2_MASK_SH_LIST(mask_sh),\ LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_DATA_ORDER_INVERT_18_BIT, mask_sh),\ LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL0, RDPCS_PHY_TX_VBOOST_LVL, mask_sh),\ - LE_SF(RDPCSTX0_RDPCSTX_CLOCK_CNTL, RDPCS_TX_CLK_EN, mask_sh) + LE_SF(RDPCSTX0_RDPCSTX_CLOCK_CNTL, RDPCS_TX_CLK_EN, mask_sh),\ + LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, mask_sh),\ + LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, mask_sh) + void dcn30_link_encoder_construct( struct dcn20_link_encoder *enc20, From 8e80d482608a4e6a97c75272ef8b4bcfc5d0c490 Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Wed, 5 Aug 2020 17:28:37 +0800 Subject: [PATCH 174/290] drm/amd/display: Fix DFPstate hang due to view port changed [Why] Place the cursor in the center of screen between two pipes then adjusting the viewport but cursour doesn't update cause DFPstate hang. [How] If viewport changed, update cursor as well. Cc: stable@vger.kernel.org Signed-off-by: Paul Hsieh Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 66180b4332f1..c8cfd3ba1c15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1457,8 +1457,8 @@ static void dcn20_update_dchubp_dpp( /* Any updates are handled in dc interface, just need to apply existing for plane enable */ if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || - pipe_ctx->update_flags.bits.scaler || pipe_ctx->update_flags.bits.viewport) - && pipe_ctx->stream->cursor_attributes.address.quad_part != 0) { + pipe_ctx->update_flags.bits.scaler || viewport_changed == true) && + pipe_ctx->stream->cursor_attributes.address.quad_part != 0) { dc->hwss.set_cursor_position(pipe_ctx); dc->hwss.set_cursor_attribute(pipe_ctx); From 5ec37c089e7d02265b4e2ed9529bc7497fc1cf5a Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Thu, 6 Aug 2020 13:57:08 -0400 Subject: [PATCH 175/290] drm/amd/display: fix dcn3 wide timing dsc validation Wide timing DSC requires odm. Since spreadsheet is missing this dsc validation we have to modify DML vba code ourselves. Signed-off-by: Dmytro Laktyushkin Reviewed-by: Wesley Chalmers Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index b54814f11b74..2beb284f89b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -63,6 +63,7 @@ typedef struct { #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff +#define DCN30_MAX_DSC_IMAGE_WIDTH 5184 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( @@ -3984,6 +3985,9 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; + } else if (v->DSCEnabled[k] && (v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH)) { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; } else { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; From db0473f62438e075b6832fd1490fd06d03c59372 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Thu, 6 Aug 2020 15:40:01 -0400 Subject: [PATCH 176/290] drm/amd/display: Reset scrambling on Test Pattern [Why] Programming is missing the sequence where for eDP the scrambling is reset when testing for eye diagram test pattern. [How] Include the required register in the definition Signed-off-by: Chris Park Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 653a571e366d..ebe0cc5b833b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -491,6 +491,7 @@ static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { [id] = {\ LE_DCN3_REG_LIST(id), \ UNIPHY_DCN2_REG_LIST(phyid), \ + SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ } static const struct dce110_aux_registers_shift aux_shift = { From d2e59d0ff4c44d1f6f8ed884a5bea7d1bb7fd98c Mon Sep 17 00:00:00 2001 From: Krunoslav Kovac Date: Thu, 6 Aug 2020 17:54:47 -0400 Subject: [PATCH 177/290] drm/amd/display: fix pow() crashing when given base 0 [Why&How] pow(a,x) is implemented as exp(x*log(a)). log(0) will crash. So return 0^x = 0, unless x=0, convention seems to be 0^0 = 1. Cc: stable@vger.kernel.org Signed-off-by: Krunoslav Kovac Reviewed-by: Anthony Koo Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/include/fixed31_32.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index 89ef9f6860e5..16df2a485dd0 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -431,6 +431,9 @@ struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg); */ static inline struct fixed31_32 dc_fixpt_pow(struct fixed31_32 arg1, struct fixed31_32 arg2) { + if (arg1.value == 0) + return arg2.value == 0 ? dc_fixpt_one : dc_fixpt_zero; + return dc_fixpt_exp( dc_fixpt_mul( dc_fixpt_log(arg1), From eabe861881a733fc84f286f4d5a1ffaddd4f526f Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 15 Aug 2020 04:46:41 -0400 Subject: [PATCH 178/290] net: handle the return value of pskb_carve_frag_list() correctly pskb_carve_frag_list() may return -ENOMEM in pskb_carve_inside_nonlinear(). we should handle this correctly or we would get wrong sk_buff. Fixes: 6fa01ccd8830 ("skbuff: Add pskb_extract() helper function") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5c3b906aeef3..e18184ffa9c3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5987,9 +5987,13 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); - if (k == 0) { - /* split line is in frag list */ - pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask); + /* split line is in frag list */ + if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) { + /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */ + if (skb_has_frag_list(skb)) + kfree_skb_list(skb_shinfo(skb)->frag_list); + kfree(data); + return -ENOMEM; } skb_release_data(skb); From 0410d07190961ac526f05085765a8d04d926545b Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Sun, 16 Aug 2020 20:52:44 +0200 Subject: [PATCH 179/290] bonding: fix active-backup failover for current ARP slave When the ARP monitor is used for link detection, ARP replies are validated for all slaves (arp_validate=3) and fail_over_mac is set to active, two slaves of an active-backup bond may get stuck in a state where both of them are active and pass packets that they receive to the bond. This state makes IPv6 duplicate address detection fail. The state is reached thus: 1. The current active slave goes down because the ARP target is not reachable. 2. The current ARP slave is chosen and made active. 3. A new slave is enslaved. This new slave becomes the current active slave and can reach the ARP target. As a result, the current ARP slave stays active after the enslave action has finished and the log is littered with "PROBE BAD" messages: > bond0: PROBE: c_arp ens10 && cas ens11 BAD The workaround is to remove the slave with "going back" status from the bond and re-enslave it. This issue was encountered when DPDK PMD interfaces were being enslaved to an active-backup bond. I would be possible to fix the issue in bond_enslave() or bond_change_active_slave() but the ARP monitor was fixed instead to keep most of the actions changing the current ARP slave in the ARP monitor code. The current ARP slave is set as inactive and backup during the commit phase. A new state, BOND_LINK_FAIL, has been introduced for slaves in the context of the ARP monitor. This allows administrators to see how slaves are rotated for sending ARP requests and attempts are made to find a new active slave. Fixes: b2220cad583c9 ("bonding: refactor ARP active-backup monitor") Signed-off-by: Jiri Wiesner Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 415a37e44cae..c5d3032dd1a2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2948,6 +2948,9 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (bond_time_in_interval(bond, last_rx, 1)) { bond_propose_link_state(slave, BOND_LINK_UP); commit++; + } else if (slave->link == BOND_LINK_BACK) { + bond_propose_link_state(slave, BOND_LINK_FAIL); + commit++; } continue; } @@ -3056,6 +3059,19 @@ static void bond_ab_arp_commit(struct bonding *bond) continue; + case BOND_LINK_FAIL: + bond_set_slave_link_state(slave, BOND_LINK_FAIL, + BOND_SLAVE_NOTIFY_NOW); + bond_set_slave_inactive_flags(slave, + BOND_SLAVE_NOTIFY_NOW); + + /* A slave has just been enslaved and has become + * the current active slave. + */ + if (rtnl_dereference(bond->curr_active_slave)) + RCU_INIT_POINTER(bond->current_arp_slave, NULL); + continue; + default: slave_err(bond->dev, slave->dev, "impossible: link_new_state %d on slave\n", @@ -3106,8 +3122,6 @@ static bool bond_ab_arp_probe(struct bonding *bond) return should_notify_rtnl; } - bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_LATER); - bond_for_each_slave_rcu(bond, slave, iter) { if (!found && !before && bond_slave_is_up(slave)) before = slave; From 4ef1a7cb08e94da1f2f2a34ee6cefe7ae142dc98 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Aug 2020 14:30:49 +0800 Subject: [PATCH 180/290] ipv6: some fixes for ipv6_dev_find() This patch is to do 3 things for ipv6_dev_find(): As David A. noticed, - rt6_lookup() is not really needed. Different from __ip_dev_find(), ipv6_dev_find() doesn't have a compatibility problem, so remove it. As Hideaki suggested, - "valid" (non-tentative) check for the address is also needed. ipv6_chk_addr() calls ipv6_chk_addr_and_flags(), which will traverse the address hash list, but it's heavy to be called inside ipv6_dev_find(). This patch is to reuse the code of ipv6_chk_addr_and_flags() for ipv6_dev_find(). - dev parameter is passed into ipv6_dev_find(), as link-local addresses from user space has sin6_scope_id set and the dev lookup needs it. Fixes: 81f6cb31222d ("ipv6: add ipv6_dev_find()") Suggested-by: YOSHIFUJI Hideaki Reported-by: David Ahern Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/addrconf.h | 3 ++- net/ipv6/addrconf.c | 60 ++++++++++++++++-------------------------- net/tipc/udp_media.c | 8 +++--- 3 files changed, 28 insertions(+), 43 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index ba3f6c15ad2b..18f783dcd55f 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -97,7 +97,8 @@ bool ipv6_chk_custom_prefix(const struct in6_addr *addr, int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev); -struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr); +struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr, + struct net_device *dev); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8e761b8c47c6..01146b66d666 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1893,12 +1893,13 @@ EXPORT_SYMBOL(ipv6_chk_addr); * 2. does the address exist on the specific device * (skip_dev_check = false) */ -int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, bool skip_dev_check, - int strict, u32 banned_flags) +static struct net_device * +__ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, bool skip_dev_check, + int strict, u32 banned_flags) { unsigned int hash = inet6_addr_hash(net, addr); - const struct net_device *l3mdev; + struct net_device *l3mdev, *ndev; struct inet6_ifaddr *ifp; u32 ifp_flags; @@ -1909,10 +1910,11 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, dev = NULL; hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) + ndev = ifp->idev->dev; + if (!net_eq(dev_net(ndev), net)) continue; - if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev) + if (l3mdev_master_dev_rcu(ndev) != l3mdev) continue; /* Decouple optimistic from tentative for evaluation here. @@ -1923,15 +1925,23 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, : ifp->flags; if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp_flags&banned_flags) && - (!dev || ifp->idev->dev == dev || + (!dev || ndev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { rcu_read_unlock(); - return 1; + return ndev; } } rcu_read_unlock(); - return 0; + return NULL; +} + +int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, bool skip_dev_check, + int strict, u32 banned_flags) +{ + return __ipv6_chk_addr_and_flags(net, addr, dev, skip_dev_check, + strict, banned_flags) ? 1 : 0; } EXPORT_SYMBOL(ipv6_chk_addr_and_flags); @@ -1990,35 +2000,11 @@ EXPORT_SYMBOL(ipv6_chk_prefix); * * The caller should be protected by RCU, or RTNL. */ -struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr) +struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr, + struct net_device *dev) { - unsigned int hash = inet6_addr_hash(net, addr); - struct inet6_ifaddr *ifp, *result = NULL; - struct net_device *dev = NULL; - - rcu_read_lock(); - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (net_eq(dev_net(ifp->idev->dev), net) && - ipv6_addr_equal(&ifp->addr, addr)) { - result = ifp; - break; - } - } - - if (!result) { - struct rt6_info *rt; - - rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); - if (rt) { - dev = rt->dst.dev; - ip6_rt_put(rt); - } - } else { - dev = result->idev->dev; - } - rcu_read_unlock(); - - return dev; + return __ipv6_chk_addr_and_flags(net, addr, dev, !dev, 1, + IFA_F_TENTATIVE); } EXPORT_SYMBOL(ipv6_dev_find); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 53f0de0676b7..911d13cd2e67 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -660,6 +660,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; u8 node_id[NODE_ID_LEN] = {0,}; + struct net_device *dev; int rmcast = 0; ub = kzalloc(sizeof(*ub), GFP_ATOMIC); @@ -714,8 +715,6 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, rcu_assign_pointer(ub->bearer, b); tipc_udp_media_addr_set(&b->addr, &local); if (local.proto == htons(ETH_P_IP)) { - struct net_device *dev; - dev = __ip_dev_find(net, local.ipv4.s_addr, false); if (!dev) { err = -ENODEV; @@ -738,9 +737,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, b->mtu = b->media->mtu; #if IS_ENABLED(CONFIG_IPV6) } else if (local.proto == htons(ETH_P_IPV6)) { - struct net_device *dev; - - dev = ipv6_dev_find(net, &local.ipv6); + dev = ub->ifindex ? __dev_get_by_index(net, ub->ifindex) : NULL; + dev = ipv6_dev_find(net, &local.ipv6, dev); if (!dev) { err = -ENODEV; goto err; From 840110a4eae190dcbb9907d68216d5d1d9f25839 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 17 Aug 2020 16:34:05 +0300 Subject: [PATCH 181/290] ethtool: Fix preserving of wanted feature bits in netlink interface Currently, ethtool-netlink calculates new wanted bits as: (req_wanted & req_mask) | (old_active & ~req_mask) It completely discards the old wanted bits, so they are forgotten with the next ethtool command. Sample steps to reproduce: 1. ethtool -k eth0 tx-tcp-segmentation: on # TSO is on from the beginning 2. ethtool -K eth0 tx off tx-tcp-segmentation: off [not requested] 3. ethtool -k eth0 tx-tcp-segmentation: off [requested on] 4. ethtool -K eth0 rx off # Some change unrelated to TSO 5. ethtool -k eth0 tx-tcp-segmentation: off # "Wanted on" is forgotten This commit fixes it by changing the formula to: (req_wanted & req_mask) | (old_wanted & ~req_mask), where old_active was replaced by old_wanted to account for the wanted bits. The shortcut condition for the case where nothing was changed now compares wanted bitmasks, instead of wanted to active. Fixes: 0980bfcd6954 ("ethtool: set netdev features with FEATURES_SET request") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/features.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ethtool/features.c b/net/ethtool/features.c index 4e632dc987d8..ec196f0fddc9 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -224,7 +224,9 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) DECLARE_BITMAP(wanted_diff_mask, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(active_diff_mask, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(old_active, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(old_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(new_active, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(new_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(req_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(req_mask, NETDEV_FEATURE_COUNT); struct nlattr *tb[ETHTOOL_A_FEATURES_MAX + 1]; @@ -250,6 +252,7 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); ethnl_features_to_bitmap(old_active, dev->features); + ethnl_features_to_bitmap(old_wanted, dev->wanted_features); ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, tb[ETHTOOL_A_FEATURES_WANTED], netdev_features_strings, info->extack); @@ -261,11 +264,11 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; } - /* set req_wanted bits not in req_mask from old_active */ + /* set req_wanted bits not in req_mask from old_wanted */ bitmap_and(req_wanted, req_wanted, req_mask, NETDEV_FEATURE_COUNT); - bitmap_andnot(new_active, old_active, req_mask, NETDEV_FEATURE_COUNT); - bitmap_or(req_wanted, new_active, req_wanted, NETDEV_FEATURE_COUNT); - if (bitmap_equal(req_wanted, old_active, NETDEV_FEATURE_COUNT)) { + bitmap_andnot(new_wanted, old_wanted, req_mask, NETDEV_FEATURE_COUNT); + bitmap_or(req_wanted, new_wanted, req_wanted, NETDEV_FEATURE_COUNT); + if (bitmap_equal(req_wanted, old_wanted, NETDEV_FEATURE_COUNT)) { ret = 0; goto out_rtnl; } From 2847bfed888fbb8bf4c8e8067fd6127538c2c700 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 17 Aug 2020 16:34:06 +0300 Subject: [PATCH 182/290] ethtool: Account for hw_features in netlink interface ethtool-netlink ignores dev->hw_features and may confuse the drivers by asking them to enable features not in the hw_features bitmask. For example: 1. ethtool -k eth0 tls-hw-tx-offload: off [fixed] 2. ethtool -K eth0 tls-hw-tx-offload on tls-hw-tx-offload: on 3. ethtool -k eth0 tls-hw-tx-offload: on [fixed] Fitler out dev->hw_features from req_wanted to fix it and to resemble the legacy ethtool behavior. Fixes: 0980bfcd6954 ("ethtool: set netdev features with FEATURES_SET request") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/features.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/features.c b/net/ethtool/features.c index ec196f0fddc9..6b288bfd7678 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -273,7 +273,8 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; } - dev->wanted_features = ethnl_bitmap_to_features(req_wanted); + dev->wanted_features &= ~dev->hw_features; + dev->wanted_features |= ethnl_bitmap_to_features(req_wanted) & dev->hw_features; __netdev_update_features(dev); ethnl_features_to_bitmap(new_active, dev->features); mod = !bitmap_equal(old_active, new_active, NETDEV_FEATURE_COUNT); From f01204ec8be7ea5e8f0230a7d4200e338d563bde Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 17 Aug 2020 16:34:07 +0300 Subject: [PATCH 183/290] ethtool: Don't omit the netlink reply if no features were changed The legacy ethtool userspace tool shows an error when no features could be changed. It's useful to have a netlink reply to be able to show this error when __netdev_update_features wasn't called, for example: 1. ethtool -k eth0 large-receive-offload: off 2. ethtool -K eth0 rx-fcs on 3. ethtool -K eth0 lro on Could not change any device features rx-lro: off [requested on] 4. ethtool -K eth0 lro on # The output should be the same, but without this patch the kernel # doesn't send the reply, and ethtool is unable to detect the error. This commit makes ethtool-netlink always return a reply when requested, and it still avoids unnecessary calls to __netdev_update_features if the wanted features haven't changed. Fixes: 0980bfcd6954 ("ethtool: set netdev features with FEATURES_SET request") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/features.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/ethtool/features.c b/net/ethtool/features.c index 6b288bfd7678..495635f152ba 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -268,14 +268,11 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) bitmap_and(req_wanted, req_wanted, req_mask, NETDEV_FEATURE_COUNT); bitmap_andnot(new_wanted, old_wanted, req_mask, NETDEV_FEATURE_COUNT); bitmap_or(req_wanted, new_wanted, req_wanted, NETDEV_FEATURE_COUNT); - if (bitmap_equal(req_wanted, old_wanted, NETDEV_FEATURE_COUNT)) { - ret = 0; - goto out_rtnl; + if (!bitmap_equal(req_wanted, old_wanted, NETDEV_FEATURE_COUNT)) { + dev->wanted_features &= ~dev->hw_features; + dev->wanted_features |= ethnl_bitmap_to_features(req_wanted) & dev->hw_features; + __netdev_update_features(dev); } - - dev->wanted_features &= ~dev->hw_features; - dev->wanted_features |= ethnl_bitmap_to_features(req_wanted) & dev->hw_features; - __netdev_update_features(dev); ethnl_features_to_bitmap(new_active, dev->features); mod = !bitmap_equal(old_active, new_active, NETDEV_FEATURE_COUNT); From 17340552ce449ab55e3ccbfc2af1bcc600b4fbb5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 17 Aug 2020 23:40:42 +0100 Subject: [PATCH 184/290] net: mscc: ocelot: remove duplicate "the the" phrase in Kconfig text The Kconfig help text contains the phrase "the the" in the help text. Fix this. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/Kconfig b/drivers/net/dsa/ocelot/Kconfig index f121619d81fe..2d23ccef7d0e 100644 --- a/drivers/net/dsa/ocelot/Kconfig +++ b/drivers/net/dsa/ocelot/Kconfig @@ -9,7 +9,7 @@ config NET_DSA_MSCC_FELIX select NET_DSA_TAG_OCELOT select FSL_ENETC_MDIO help - This driver supports network switches from the the Vitesse / + This driver supports network switches from the Vitesse / Microsemi / Microchip Ocelot family of switching cores that are connected to their host CPU via Ethernet. The following switches are supported: From ad6641189c5935192a15eeb4b369dd04ebedfabb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 17 Aug 2020 23:44:25 +0100 Subject: [PATCH 185/290] net: ipv4: remove duplicate "the the" phrase in Kconfig text The Kconfig help text contains the phrase "the the" in the help text. Fix this and reformat the block of help text. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 60db5a6487cc..87983e70f03f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -661,13 +661,13 @@ config TCP_CONG_BBR BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to maximize network utilization and minimize queues. It builds an explicit - model of the the bottleneck delivery rate and path round-trip - propagation delay. It tolerates packet loss and delay unrelated to - congestion. It can operate over LAN, WAN, cellular, wifi, or cable - modem links. It can coexist with flows that use loss-based congestion - control, and can operate with shallow buffers, deep buffers, - bufferbloat, policers, or AQM schemes that do not provide a delay - signal. It requires the fq ("Fair Queue") pacing packet scheduler. + model of the bottleneck delivery rate and path round-trip propagation + delay. It tolerates packet loss and delay unrelated to congestion. It + can operate over LAN, WAN, cellular, wifi, or cable modem links. It can + coexist with flows that use loss-based congestion control, and can + operate with shallow buffers, deep buffers, bufferbloat, policers, or + AQM schemes that do not provide a delay signal. It requires the fq + ("Fair Queue") pacing packet scheduler. choice prompt "Default TCP congestion control" From e679654a704e5bd676ea6446fa7b764cbabf168a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 18 Aug 2020 15:23:09 -0700 Subject: [PATCH 186/290] bpf: Fix a rcu_sched stall issue with bpf task/task_file iterator In our production system, we observed rcu stalls when 'bpftool prog` is running. rcu: INFO: rcu_sched self-detected stall on CPU rcu: \x097-....: (20999 ticks this GP) idle=302/1/0x4000000000000000 softirq=1508852/1508852 fqs=4913 \x09(t=21031 jiffies g=2534773 q=179750) NMI backtrace for cpu 7 CPU: 7 PID: 184195 Comm: bpftool Kdump: loaded Tainted: G W 5.8.0-00004-g68bfc7f8c1b4 #6 Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A17 05/03/2019 Call Trace: dump_stack+0x57/0x70 nmi_cpu_backtrace.cold+0x14/0x53 ? lapic_can_unplug_cpu.cold+0x39/0x39 nmi_trigger_cpumask_backtrace+0xb7/0xc7 rcu_dump_cpu_stacks+0xa2/0xd0 rcu_sched_clock_irq.cold+0x1ff/0x3d9 ? tick_nohz_handler+0x100/0x100 update_process_times+0x5b/0x90 tick_sched_timer+0x5e/0xf0 __hrtimer_run_queues+0x12a/0x2a0 hrtimer_interrupt+0x10e/0x280 __sysvec_apic_timer_interrupt+0x51/0xe0 asm_call_on_stack+0xf/0x20 sysvec_apic_timer_interrupt+0x6f/0x80 asm_sysvec_apic_timer_interrupt+0x12/0x20 RIP: 0010:task_file_seq_get_next+0x71/0x220 Code: 00 00 8b 53 1c 49 8b 7d 00 89 d6 48 8b 47 20 44 8b 18 41 39 d3 76 75 48 8b 4f 20 8b 01 39 d0 76 61 41 89 d1 49 39 c1 48 19 c0 <48> 8b 49 08 21 d0 48 8d 04 c1 4c 8b 08 4d 85 c9 74 46 49 8b 41 38 RSP: 0018:ffffc90006223e10 EFLAGS: 00000297 RAX: ffffffffffffffff RBX: ffff888f0d172388 RCX: ffff888c8c07c1c0 RDX: 00000000000f017b RSI: 00000000000f017b RDI: ffff888c254702c0 RBP: ffffc90006223e68 R08: ffff888be2a1c140 R09: 00000000000f017b R10: 0000000000000002 R11: 0000000000100000 R12: ffff888f23c24118 R13: ffffc90006223e60 R14: ffffffff828509a0 R15: 00000000ffffffff task_file_seq_next+0x52/0xa0 bpf_seq_read+0xb9/0x320 vfs_read+0x9d/0x180 ksys_read+0x5f/0xe0 do_syscall_64+0x38/0x60 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f8815f4f76e Code: c0 e9 f6 fe ff ff 55 48 8d 3d 76 70 0a 00 48 89 e5 e8 36 06 02 00 66 0f 1f 44 00 00 64 8b 04 25 18 00 00 00 85 c0 75 14 0f 05 <48> 3d 00 f0 ff ff 77 52 c3 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 RSP: 002b:00007fff8f9df578 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 000000000170b9c0 RCX: 00007f8815f4f76e RDX: 0000000000001000 RSI: 00007fff8f9df5b0 RDI: 0000000000000007 RBP: 00007fff8f9e05f0 R08: 0000000000000049 R09: 0000000000000010 R10: 00007f881601fa40 R11: 0000000000000246 R12: 00007fff8f9e05a8 R13: 00007fff8f9e05a8 R14: 0000000001917f90 R15: 000000000000e22e Note that `bpftool prog` actually calls a task_file bpf iterator program to establish an association between prog/map/link/btf anon files and processes. In the case where the above rcu stall occured, we had a process having 1587 tasks and each task having roughly 81305 files. This implied 129 million bpf prog invocations. Unfortunwtely none of these files are prog/map/link/btf files so bpf iterator/prog needs to traverse all these files and not able to return to user space since there are no seq_file buffer overflow. This patch fixed the issue in bpf_seq_read() to limit the number of visited objects. If the maximum number of visited objects is reached, no more objects will be visited in the current syscall. If there is nothing written in the seq_file buffer, -EAGAIN will return to the user so user can try again. The maximum number of visited objects is set at 1 million. In our Intel Xeon D-2191 2.3GHZ 18-core server, bpf_seq_read() visiting 1 million files takes around 0.18 seconds. We did not use cond_resched() since for some iterators, e.g., netlink iterator, where rcu read_lock critical section spans between consecutive seq_ops->next(), which makes impossible to do cond_resched() in the key while loop of function bpf_seq_read(). Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Cc: Paul E. McKenney Link: https://lore.kernel.org/bpf/20200818222309.2181348-1-yhs@fb.com --- kernel/bpf/bpf_iter.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index b6715964b685..8faa2ce89396 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -67,6 +67,9 @@ static void bpf_iter_done_stop(struct seq_file *seq) iter_priv->done_stop = true; } +/* maximum visited objects before bailing out */ +#define MAX_ITER_OBJECTS 1000000 + /* bpf_seq_read, a customized and simpler version for bpf iterator. * no_llseek is assumed for this file. * The following are differences from seq_read(): @@ -79,7 +82,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, { struct seq_file *seq = file->private_data; size_t n, offs, copied = 0; - int err = 0; + int err = 0, num_objs = 0; void *p; mutex_lock(&seq->lock); @@ -135,6 +138,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, while (1) { loff_t pos = seq->index; + num_objs++; offs = seq->count; p = seq->op->next(seq, p, &seq->index); if (pos == seq->index) { @@ -153,6 +157,15 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, if (seq->count >= size) break; + if (num_objs >= MAX_ITER_OBJECTS) { + if (offs == 0) { + err = -EAGAIN; + seq->op->stop(seq, p); + goto done; + } + break; + } + err = seq->op->show(seq, p); if (err > 0) { bpf_iter_dec_seq_num(seq); From e60572b8d4c39572be6857d1ec91fdf979f8775f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 18 Aug 2020 15:23:10 -0700 Subject: [PATCH 187/290] bpf: Avoid visit same object multiple times Currently when traversing all tasks, the next tid is always increased by one. This may result in visiting the same task multiple times in a pid namespace. This patch fixed the issue by seting the next tid as pid_nr_ns(pid, ns) + 1, similar to funciton next_tgid(). Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Cc: Rik van Riel Link: https://lore.kernel.org/bpf/20200818222310.2181500-1-yhs@fb.com --- kernel/bpf/task_iter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index f21b5e1e4540..99af4cea1102 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -29,8 +29,9 @@ static struct task_struct *task_seq_get_next(struct pid_namespace *ns, rcu_read_lock(); retry: - pid = idr_get_next(&ns->idr, tid); + pid = find_ge_pid(*tid, ns); if (pid) { + *tid = pid_nr_ns(pid, ns); task = get_pid_task(pid, PIDTYPE_PID); if (!task) { ++*tid; From 00fa1d83a8b50351c830521d00135e823c46e7d0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 18 Aug 2020 15:23:12 -0700 Subject: [PATCH 188/290] bpftool: Handle EAGAIN error code properly in pids collection When the error code is EAGAIN, the kernel signals the user space should retry the read() operation for bpf iterators. Let us do it. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200818222312.2181675-1-yhs@fb.com --- tools/bpf/bpftool/pids.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index e3b116325403..df7d8ec76036 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -134,6 +134,8 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) while (true) { ret = read(fd, buf, sizeof(buf)); if (ret < 0) { + if (errno == EAGAIN) + continue; err = -errno; p_err("failed to read PID iterator output: %d", err); goto out; From e17f02d0559c174cf1f6435e45134490111eaa37 Mon Sep 17 00:00:00 2001 From: Mike Pozulp Date: Tue, 18 Aug 2020 09:54:44 -0700 Subject: [PATCH 189/290] ALSA: hda/realtek: Add quirk for Samsung Galaxy Book Ion The Galaxy Book Ion uses the same ALC298 codec as other Samsung laptops which have the no headphone sound bug, like my Samsung Notebook. The Galaxy Book owner confirmed that this patch fixes the bug. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207423 Signed-off-by: Mike Pozulp Cc: Link: https://lore.kernel.org/r/20200818165446.499821-1-pozulp.kernel@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5722f0bd3b31..a1fa983d2a94 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7695,6 +7695,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), From df3ab3cb7eae63c6eb7c9aebcc196a75d59f65dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Jul 2020 10:46:43 +0100 Subject: [PATCH 190/290] drm/i915: Provide the perf pmu.module Rather than manually implement our own module reference counting for perf pmu events, finally realise that there is a module parameter to struct pmu for this very purpose. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200716094643.31410-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi (cherry picked from commit 27e897beec1c59861f15d4d3562c39ad1143620f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_pmu.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 28bc5f13ae52..056994224c6b 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -445,8 +445,6 @@ static void i915_pmu_event_destroy(struct perf_event *event) container_of(event->pmu, typeof(*i915), pmu.base); drm_WARN_ON(&i915->drm, event->parent); - - module_put(THIS_MODULE); } static int @@ -538,10 +536,8 @@ static int i915_pmu_event_init(struct perf_event *event) if (ret) return ret; - if (!event->parent) { - __module_get(THIS_MODULE); + if (!event->parent) event->destroy = i915_pmu_event_destroy; - } return 0; } @@ -1130,6 +1126,7 @@ void i915_pmu_register(struct drm_i915_private *i915) if (!pmu->base.attr_groups) goto err_attr; + pmu->base.module = THIS_MODULE; pmu->base.task_ctx_nr = perf_invalid_context; pmu->base.event_init = i915_pmu_event_init; pmu->base.add = i915_pmu_event_add; From c499f6cb5ea4708c0c3d76a3c49917f8d905f4d4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Jul 2020 16:06:00 +0100 Subject: [PATCH 191/290] drm/i915: Copy default modparams to mock i915_device Since we use the module parameters stored inside the drm_i915_device itself, we need to ensure the mock i915_device also sets up the right defaults. Fixes: 8a25c4be583d ("drm/i915/params: switch to device specific parameters") Signed-off-by: Chris Wilson Cc: Jani Nikula Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200728150600.4509-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi (cherry picked from commit 98ef067453709444a264939940f7b3a5dfdfa09e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index b9810bf156c3..f127e633f7ca 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -78,6 +78,7 @@ static void mock_device_release(struct drm_device *dev) drm_mode_config_cleanup(&i915->drm); out: + i915_params_free(&i915->params); put_device(&i915->drm.pdev->dev); i915->drm.pdev = NULL; } @@ -165,6 +166,8 @@ struct drm_i915_private *mock_gem_device(void) i915->drm.pdev = pdev; drmm_add_final_kfree(&i915->drm, i915); + i915_params_copy(&i915->params, &i915_modparams); + intel_runtime_pm_init_early(&i915->runtime_pm); /* Using the global GTT may ask questions about KMS users, so prepare */ From b7c664611789a96dd0bb32e0fd3e9bb405ba1179 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 29 Jul 2020 14:09:12 +0100 Subject: [PATCH 192/290] drm/i915/display: Check for an LPSP encoder before dereferencing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid a GPF at <1>[ 20.177320] BUG: kernel NULL pointer dereference, address: 000000000000007c <1>[ 20.177322] #PF: supervisor read access in kernel mode <1>[ 20.177323] #PF: error_code(0x0000) - not-present page <6>[ 20.177324] PGD 0 P4D 0 <4>[ 20.177327] Oops: 0000 [#1] PREEMPT SMP PTI <4>[ 20.177328] CPU: 1 PID: 944 Comm: debugfs_test Not tainted 5.8.0-rc7-CI-CI_DRM_8814+ #1 <4>[ 20.177330] Hardware name: Dell Inc. XPS 13 9360/0823VW, BIOS 2.9.0 07/09/2018 <4>[ 20.177372] RIP: 0010:i915_lpsp_capability_show+0x44/0xc0 [i915] <4>[ 20.177374] Code: 0f b6 81 ca 0d 00 00 3c 0b 74 77 76 19 3c 0c 75 44 83 7e 7c 01 7e 2f 48 c7 c6 d7 b9 47 a0 e8 43 df 06 e1 31 c0 c3 3c 09 72 2b <8b> 46 7c 85 c0 75 e6 8b 82 e4 00 00 00 89 c2 83 e2 fb 83 fa 0a 74 <4>[ 20.177376] RSP: 0018:ffffc90000cebe38 EFLAGS: 00010246 <4>[ 20.177377] RAX: 0000000000000009 RBX: ffff888267fe6a58 RCX: ffff888252d10000 <4>[ 20.177378] RDX: ffff88824a9a4000 RSI: 0000000000000000 RDI: ffff888267fe6a30 <4>[ 20.177379] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000001 <4>[ 20.177380] R10: 0000000000000001 R11: 0000000000000000 R12: ffffc90000cebf08 <4>[ 20.177381] R13: 00000000ffffffff R14: 0000000000000001 R15: ffff888267fe6a30 <4>[ 20.177383] FS: 00007f6f9c6b5e40(0000) GS:ffff888276480000(0000) knlGS:0000000000000000 <4>[ 20.177384] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 20.177385] CR2: 000000000000007c CR3: 0000000255f04006 CR4: 00000000003606e0 <4>[ 20.177386] Call Trace: <4>[ 20.177390] seq_read+0xcb/0x420 which is presumably from having no encoder attached at that time. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2175 Fixes: 8806211fe7b3 ("drm/i915: Add i915_lpsp_capability debugfs") Signed-off-by: Chris Wilson Cc: Animesh Manna Cc: Anshuman Gupta Cc: Uma Shankar Cc: "Ville Syrjälä" Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20200729130912.30093-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi (cherry picked from commit a22b1a9bb0d72a58d5b836653f28d97ee8fea1c4) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_debugfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 3644752cc5ec..5a5cfe25085b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -2044,9 +2044,12 @@ DEFINE_SHOW_ATTRIBUTE(i915_hdcp_sink_capability); static int i915_lpsp_capability_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; - struct intel_encoder *encoder = - intel_attached_encoder(to_intel_connector(connector)); struct drm_i915_private *i915 = to_i915(connector->dev); + struct intel_encoder *encoder; + + encoder = intel_attached_encoder(to_intel_connector(connector)); + if (!encoder) + return -ENODEV; if (connector->status != connector_status_connected) return -ENODEV; From 5fd73c53701327e725cd4ca70decb5c9a61f3bac Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 12 Jun 2020 13:47:34 -0700 Subject: [PATCH 193/290] drm/i915: Update bw_buddy pagemask table A recent bspec update removed the LPDDR4 single channel entry from the buddy register table, but added a new four-channel entry. Workaround 1409767108 hasn't been updated with any guidance for four channel configurations, so we leave that alternate table unchanged for now. Bspec 49218 Fixes: 3fa01d642fa7 ("drm/i915/tgl: Program BW_BUDDY registers during display init") Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200612204734.3674650-1-matthew.d.roper@intel.com Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi (cherry picked from commit ecb40d0826fda213ebb58d49e7d5b4752480e130) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 0c713e83274d..4929d890bd1c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -5240,10 +5240,10 @@ struct buddy_page_mask { }; static const struct buddy_page_mask tgl_buddy_page_masks[] = { - { .num_channels = 1, .type = INTEL_DRAM_LPDDR4, .page_mask = 0xE }, { .num_channels = 1, .type = INTEL_DRAM_DDR4, .page_mask = 0xF }, { .num_channels = 2, .type = INTEL_DRAM_LPDDR4, .page_mask = 0x1C }, { .num_channels = 2, .type = INTEL_DRAM_DDR4, .page_mask = 0x1F }, + { .num_channels = 4, .type = INTEL_DRAM_LPDDR4, .page_mask = 0x38 }, {} }; From c67f0c28311c44597ecd8e14d152d1775c367cdf Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 2 Aug 2020 19:15:35 +0800 Subject: [PATCH 194/290] drm/i915: Fix wrong return value in intel_atomic_check() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the case of calling check_digital_port_conflicts() failed, a negative error code -EINVAL should be returned. Fixes: bf5da83e4bd80 ("drm/i915: Move check_digital_port_conflicts() earier") Cc: Ville Syrjälä Signed-off-by: Tianjia Zhang Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200802111535.5200-1-tianjia.zhang@linux.alibaba.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 66b51b801d05ee54a0f23628cb8220189adb715e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 729ec6e0d43a..b2ec3a5141cc 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14930,7 +14930,7 @@ static int intel_atomic_check(struct drm_device *dev, if (any_ms && !check_digital_port_conflicts(state)) { drm_dbg_kms(&dev_priv->drm, "rejecting conflicting digital port configuration\n"); - ret = EINVAL; + ret = -EINVAL; goto fail; } From c43a87f537d19499e51a6f013331f196d8f0ddbe Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Wed, 25 Mar 2020 19:24:29 +0000 Subject: [PATCH 195/290] drm/i915/selftests: Avoid passing a random 0 into ilog2 igt_mm_config() calls ilog2() on the (pseudo)random 21-bit number s>>12. Once in 2 million seeds, this is zero and ilog2 summons the nasal demons. There was an attempt to handle this case with a max(), but that's too late; ms could already be something bizarre. Given that the low 12 bits of s and ms are always zero, it's a lot simpler just to divide them by 4096, then everything fits into 32 bits, and we can easily generate a random number 1 <= s <= 0x1fffff. Fixes: 14d1b9a6247c ("drm/i915: buddy allocator") Signed-off-by: George Spelvin Cc: Matthew Auld Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Reviewed-by: Matthew Auld Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200325192429.GA8865@SDF.ORG Signed-off-by: Rodrigo Vivi (cherry picked from commit 21118e8e56479ef33460fbd63a5ad0535843b666) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/selftests/i915_buddy.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c index 939a6caebb03..632b912b0bc9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_buddy.c +++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c @@ -8,8 +8,6 @@ #include "../i915_selftest.h" #include "i915_random.h" -#define SZ_8G (1ULL << 33) - static void __igt_dump_block(struct i915_buddy_mm *mm, struct i915_buddy_block *block, bool buddy) @@ -281,18 +279,22 @@ static int igt_check_mm(struct i915_buddy_mm *mm) static void igt_mm_config(u64 *size, u64 *chunk_size) { I915_RND_STATE(prng); - u64 s, ms; + u32 s, ms; /* Nothing fancy, just try to get an interesting bit pattern */ prandom_seed_state(&prng, i915_selftest.random_seed); - s = i915_prandom_u64_state(&prng) & (SZ_8G - 1); - ms = BIT_ULL(12 + (prandom_u32_state(&prng) % ilog2(s >> 12))); - s = max(s & -ms, ms); + /* Let size be a random number of pages up to 8 GB (2M pages) */ + s = 1 + i915_prandom_u32_max_state((BIT(33 - 12)) - 1, &prng); + /* Let the chunk size be a random power of 2 less than size */ + ms = BIT(i915_prandom_u32_max_state(ilog2(s), &prng)); + /* Round size down to the chunk size */ + s &= -ms; - *chunk_size = ms; - *size = s; + /* Convert from pages to bytes */ + *chunk_size = (u64)ms << 12; + *size = (u64)s << 12; } static int igt_buddy_alloc_smoke(void *arg) From 4a4064ad79699ee41b74c12fa4f9f960a5bf9b2d Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 21 Jul 2020 02:29:52 +0300 Subject: [PATCH 196/290] drm/i915/tgl: Make sure TC-cold is blocked before enabling TC AUX power wells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dependency between power wells is determined by the ordering of the power well list: when enabling the power wells for a domain, this happens walking the power well list forward, while disabling them happens in the reverse direction. Accordingly a power well on the list must follow any other power well it depends on. Since the TC AUX power wells depend on TC-cold being blocked, move the TC-cold off power well before all AUX power wells. Fixes: 3c02934b24e3 ("drm/i915/tc/tgl: Implement TC cold sequences") Cc: José Roberto de Souza Signed-off-by: Imre Deak Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200720232952.16228-1-imre.deak@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit b302a2e68807604af2a5015816c1d117747989b6) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_power.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 4929d890bd1c..e0fcb89c736b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4146,6 +4146,12 @@ static const struct i915_power_well_desc tgl_power_wells[] = { .hsw.idx = TGL_PW_CTL_IDX_DDI_TC6, }, }, + { + .name = "TC cold off", + .domains = TGL_TC_COLD_OFF_POWER_DOMAINS, + .ops = &tgl_tc_cold_off_ops, + .id = DISP_PW_ID_NONE, + }, { .name = "AUX A", .domains = TGL_AUX_A_IO_POWER_DOMAINS, @@ -4332,12 +4338,6 @@ static const struct i915_power_well_desc tgl_power_wells[] = { .hsw.irq_pipe_mask = BIT(PIPE_D), }, }, - { - .name = "TC cold off", - .domains = TGL_TC_COLD_OFF_POWER_DOMAINS, - .ops = &tgl_tc_cold_off_ops, - .id = DISP_PW_ID_NONE, - }, }; static const struct i915_power_well_desc rkl_power_wells[] = { From b90b925fd52c75ee7531df739d850a1f7c58ef06 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Wed, 19 Aug 2020 21:02:10 +0530 Subject: [PATCH 197/290] ALSA: hda: avoid reset of sdo_limit By default 'sdo_limit' is initialized with a default value of '8' as per spec. This is overridden in cases where a different value is required. However this is getting reset when snd_hdac_bus_init_chip() is called again, which happens during runtime PM cycle. Avoid this reset by moving 'sdo_limit' setup to 'snd_hdac_bus_init()' function which would be called only once. Fixes: 67ae482a59e9 ("ALSA: hda: add member to store ratio for stripe control") Cc: Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1597851130-6765-1-git-send-email-spujar@nvidia.com Signed-off-by: Takashi Iwai --- sound/hda/hdac_bus.c | 12 ++++++++++++ sound/hda/hdac_controller.c | 11 ----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sound/hda/hdac_bus.c b/sound/hda/hdac_bus.c index 09ddab5f5cae..9766f6af8743 100644 --- a/sound/hda/hdac_bus.c +++ b/sound/hda/hdac_bus.c @@ -46,6 +46,18 @@ int snd_hdac_bus_init(struct hdac_bus *bus, struct device *dev, INIT_LIST_HEAD(&bus->hlink_list); init_waitqueue_head(&bus->rirb_wq); bus->irq = -1; + + /* + * Default value of '8' is as per the HD audio specification (Rev 1.0a). + * Following relation is used to derive STRIPE control value. + * For sample rate <= 48K: + * { ((num_channels * bits_per_sample) / number of SDOs) >= 8 } + * For sample rate > 48K: + * { ((num_channels * bits_per_sample * rate/48000) / + * number of SDOs) >= 8 } + */ + bus->sdo_limit = 8; + return 0; } EXPORT_SYMBOL_GPL(snd_hdac_bus_init); diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 011b17cc1efa..b98449fd92f3 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -529,17 +529,6 @@ bool snd_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset) bus->chip_init = true; - /* - * Default value of '8' is as per the HD audio specification (Rev 1.0a). - * Following relation is used to derive STRIPE control value. - * For sample rate <= 48K: - * { ((num_channels * bits_per_sample) / number of SDOs) >= 8 } - * For sample rate > 48K: - * { ((num_channels * bits_per_sample * rate/48000) / - * number of SDOs) >= 8 } - */ - bus->sdo_limit = 8; - return true; } EXPORT_SYMBOL_GPL(snd_hdac_bus_init_chip); From 00a3fff0712cd9cc4112ecf6da0916f8503e2a86 Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Mon, 10 Aug 2020 22:21:28 -0400 Subject: [PATCH 198/290] jbd2: clean up checksum verification in do_one_pass() Remove the unnecessary chksum_err and checksum_seen variables as well as some redundant code to make the function easier to understand. [ With changes suggested by jack@ and tytso@ ] Signed-off-by: Shijie Luo Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20200819122955.33526-1-luoshijie1@huawei.com Signed-off-by: Theodore Ts'o --- fs/jbd2/recovery.c | 44 +++++++++++--------------------------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 2ed278f0dced..faa97d748474 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -690,14 +690,11 @@ static int do_one_pass(journal_t *journal, * number. */ if (pass == PASS_SCAN && jbd2_has_feature_checksum(journal)) { - int chksum_err, chksum_seen; struct commit_header *cbh = (struct commit_header *)bh->b_data; unsigned found_chksum = be32_to_cpu(cbh->h_chksum[0]); - chksum_err = chksum_seen = 0; - if (info->end_transaction) { journal->j_failed_commit = info->end_transaction; @@ -705,42 +702,23 @@ static int do_one_pass(journal_t *journal, break; } - if (crc32_sum == found_chksum && - cbh->h_chksum_type == JBD2_CRC32_CHKSUM && - cbh->h_chksum_size == - JBD2_CRC32_CHKSUM_SIZE) - chksum_seen = 1; - else if (!(cbh->h_chksum_type == 0 && - cbh->h_chksum_size == 0 && - found_chksum == 0 && - !chksum_seen)) - /* - * If fs is mounted using an old kernel and then - * kernel with journal_chksum is used then we - * get a situation where the journal flag has - * checksum flag set but checksums are not - * present i.e chksum = 0, in the individual - * commit blocks. - * Hence to avoid checksum failures, in this - * situation, this extra check is added. - */ - chksum_err = 1; + /* Neither checksum match nor unused? */ + if (!((crc32_sum == found_chksum && + cbh->h_chksum_type == + JBD2_CRC32_CHKSUM && + cbh->h_chksum_size == + JBD2_CRC32_CHKSUM_SIZE) || + (cbh->h_chksum_type == 0 && + cbh->h_chksum_size == 0 && + found_chksum == 0))) + goto chksum_error; - if (chksum_err) { - info->end_transaction = next_commit_ID; - - if (!jbd2_has_feature_async_commit(journal)) { - journal->j_failed_commit = - next_commit_ID; - brelse(bh); - break; - } - } crc32_sum = ~0; } if (pass == PASS_SCAN && !jbd2_commit_block_csum_verify(journal, bh->b_data)) { + chksum_error: info->end_transaction = next_commit_ID; if (!jbd2_has_feature_async_commit(journal)) { From 7ca4fcba92edf82b05533a6e4f6b19a5fbec6f54 Mon Sep 17 00:00:00 2001 From: kyoungho koo Date: Sat, 25 Apr 2020 02:16:24 +0900 Subject: [PATCH 199/290] ext4: Fix comment typo "the the". I have found double typed comments "the the". So i modified it to one "the" Signed-off-by: kyoungho koo Link: https://lore.kernel.org/r/20200424171620.GA11943@koo-Z370-HD3 Signed-off-by: Theodore Ts'o --- fs/ext4/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 13054653a06a..75c97bca0815 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -276,7 +276,7 @@ static int ext4_create_inline_data(handle_t *handle, len = 0; } - /* Insert the the xttr entry. */ + /* Insert the xttr entry. */ i.value = value; i.value_len = len; From c55ee7d202857b000531c2acfe8ce7cba5b77b5c Mon Sep 17 00:00:00 2001 From: brookxu Date: Sat, 15 Aug 2020 08:10:44 +0800 Subject: [PATCH 200/290] ext4: add mb_debug logging when there are lost chunks Lost chunks are when some other process raced with the current thread to grab a particular block allocation. Add mb_debug log for developers who wants to see how often this is happening for a particular workload. Signed-off-by: Chunguang Xu Link: https://lore.kernel.org/r/0a165ac0-1912-aebd-8a0d-b42e7cd1aea1@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 51e2fe6094be..0993044cfe46 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2309,6 +2309,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) struct ext4_sb_info *sbi; struct super_block *sb; struct ext4_buddy e4b; + unsigned int lost; sb = ac->ac_sb; sbi = EXT4_SB(sb); @@ -2456,22 +2457,24 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) * We've been searching too long. Let's try to allocate * the best chunk we've found so far */ - ext4_mb_try_best_found(ac, &e4b); if (ac->ac_status != AC_STATUS_FOUND) { /* * Someone more lucky has already allocated it. * The only thing we can do is just take first * found block(s) - printk(KERN_DEBUG "EXT4-fs: someone won our chunk\n"); */ + lost = (unsigned int)atomic_inc_return(&sbi->s_mb_lost_chunks); + mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %u\n", + ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len, lost); + ac->ac_b_ex.fe_group = 0; ac->ac_b_ex.fe_start = 0; ac->ac_b_ex.fe_len = 0; ac->ac_status = AC_STATUS_CONTINUE; ac->ac_flags |= EXT4_MB_HINT_FIRST; cr = 3; - atomic_inc(&sbi->s_mb_lost_chunks); goto repeat; } } From 66d5e0277e225cdc5d272fc22b1aa90a9b0d21ac Mon Sep 17 00:00:00 2001 From: brookxu Date: Mon, 17 Aug 2020 15:36:06 +0800 Subject: [PATCH 201/290] ext4: reorganize if statement of ext4_mb_release_context() Reorganize the if statement of ext4_mb_release_context(), make it easier to read. Signed-off-by: Chunguang Xu Link: https://lore.kernel.org/r/5439ac6f-db79-ad68-76c1-a4dda9aa0cc3@gmail.com Reviewed-by: Ritesh Harjani Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0993044cfe46..45ac6088b4ac 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2309,7 +2309,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) struct ext4_sb_info *sbi; struct super_block *sb; struct ext4_buddy e4b; - unsigned int lost; + int lost; sb = ac->ac_sb; sbi = EXT4_SB(sb); @@ -2464,8 +2464,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) * The only thing we can do is just take first * found block(s) */ - lost = (unsigned int)atomic_inc_return(&sbi->s_mb_lost_chunks); - mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %u\n", + lost = atomic_inc_return(&sbi->s_mb_lost_chunks); + mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %d\n", ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len, lost); @@ -4706,20 +4706,19 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) pa->pa_free -= ac->ac_b_ex.fe_len; pa->pa_len -= ac->ac_b_ex.fe_len; spin_unlock(&pa->pa_lock); - } - } - if (pa) { - /* - * We want to add the pa to the right bucket. - * Remove it from the list and while adding - * make sure the list to which we are adding - * doesn't grow big. - */ - if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { - spin_lock(pa->pa_obj_lock); - list_del_rcu(&pa->pa_inode_list); - spin_unlock(pa->pa_obj_lock); - ext4_mb_add_n_trim(ac); + + /* + * We want to add the pa to the right bucket. + * Remove it from the list and while adding + * make sure the list to which we are adding + * doesn't grow big. + */ + if (likely(pa->pa_free)) { + spin_lock(pa->pa_obj_lock); + list_del_rcu(&pa->pa_inode_list); + spin_unlock(pa->pa_obj_lock); + ext4_mb_add_n_trim(ac); + } } ext4_mb_put_pa(ac, ac->ac_sb, pa); } From 27bc446e2def38db3244a6eb4bb1d6312936610a Mon Sep 17 00:00:00 2001 From: brookxu Date: Mon, 17 Aug 2020 15:36:15 +0800 Subject: [PATCH 202/290] ext4: limit the length of per-inode prealloc list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the scenario of writing sparse files, the per-inode prealloc list may be very long, resulting in high overhead for ext4_mb_use_preallocated(). To circumvent this problem, we limit the maximum length of per-inode prealloc list to 512 and allow users to modify it. After patching, we observed that the sys ratio of cpu has dropped, and the system throughput has increased significantly. We created a process to write the sparse file, and the running time of the process on the fixed kernel was significantly reduced, as follows: Running time on unfixed kernel: [root@TENCENT64 ~]# time taskset 0x01 ./sparse /data1/sparce.dat real 0m2.051s user 0m0.008s sys 0m2.026s Running time on fixed kernel: [root@TENCENT64 ~]# time taskset 0x01 ./sparse /data1/sparce.dat real 0m0.471s user 0m0.004s sys 0m0.395s Signed-off-by: Chunguang Xu Link: https://lore.kernel.org/r/d7a98178-056b-6db5-6bce-4ead23f4a257@gmail.com Signed-off-by: Theodore Ts'o --- Documentation/admin-guide/ext4.rst | 3 ++ fs/ext4/ext4.h | 4 +- fs/ext4/extents.c | 10 ++-- fs/ext4/file.c | 2 +- fs/ext4/indirect.c | 2 +- fs/ext4/inode.c | 6 +-- fs/ext4/ioctl.c | 2 +- fs/ext4/mballoc.c | 74 ++++++++++++++++++++++++++---- fs/ext4/mballoc.h | 4 ++ fs/ext4/move_extent.c | 4 +- fs/ext4/super.c | 3 +- fs/ext4/sysfs.c | 2 + include/trace/events/ext4.h | 17 ++++--- 13 files changed, 104 insertions(+), 29 deletions(-) diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst index 7fc6a72920c9..59bcc4a92602 100644 --- a/Documentation/admin-guide/ext4.rst +++ b/Documentation/admin-guide/ext4.rst @@ -482,6 +482,9 @@ Files in /sys/fs/ext4/: multiple of this tuning parameter if the stripe size is not set in the ext4 superblock + mb_max_inode_prealloc + The maximum length of per-inode ext4_prealloc_space list. + mb_max_to_scan The maximum number of extents the multiblock allocator will search to find the best extent. diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 71b4370a3f91..523e00d7b392 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1070,6 +1070,7 @@ struct ext4_inode_info { struct timespec64 i_crtime; /* mballoc */ + atomic_t i_prealloc_active; struct list_head i_prealloc_list; spinlock_t i_prealloc_lock; @@ -1518,6 +1519,7 @@ struct ext4_sb_info { unsigned int s_mb_stats; unsigned int s_mb_order2_reqs; unsigned int s_mb_group_prealloc; + unsigned int s_mb_max_inode_prealloc; unsigned int s_max_dir_size_kb; /* where last allocation was done - for stream allocation */ unsigned long s_mb_last_group; @@ -2682,7 +2684,7 @@ extern int ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); extern int ext4_mb_reserve_blocks(struct super_block *, int); -extern void ext4_discard_preallocations(struct inode *); +extern void ext4_discard_preallocations(struct inode *, unsigned int); extern int __init ext4_init_mballoc(void); extern void ext4_exit_mballoc(void); extern ext4_group_t ext4_mb_prefetch(struct super_block *sb, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0eea09aa0f26..a0481582187a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) * i_mutex. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; @@ -4266,7 +4266,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * not a good idea to call discard here directly, * but otherwise we'd need to call it every free(). */ - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; ext4_free_blocks(handle, inode, NULL, newblock, @@ -5293,7 +5293,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) } down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); ret = ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start); @@ -5307,7 +5307,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); ret = ext4_ext_shift_extents(inode, handle, punch_stop, punch_stop - punch_start, SHIFT_LEFT); @@ -5439,7 +5439,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) goto out_stop; down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); path = ext4_find_extent(inode, offset_lblk, NULL, 0); if (IS_ERR(path)) { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 7a2720517bbb..e608ce3fb535 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -147,7 +147,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp) (atomic_read(&inode->i_writecount) == 1) && !EXT4_I(inode)->i_reserved_data_blocks) { down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); up_write(&EXT4_I(inode)->i_data_sem); } if (is_dx(inode) && filp->private_data) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 433ca8415c5a..80c9f33800be 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, * i_mutex. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0b07576af3bf..77543f988258 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -383,7 +383,7 @@ void ext4_da_update_reserve_space(struct inode *inode, */ if ((ei->i_reserved_data_blocks == 0) && !inode_is_open_for_write(inode)) - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); } static int __check_block_validity(struct inode *inode, const char *func, @@ -4055,7 +4055,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) if (stop_block > first_block) { down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); ret = ext4_es_remove_extent(inode, first_block, stop_block - first_block); @@ -4210,7 +4210,7 @@ int ext4_truncate(struct inode *inode) down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) err = ext4_ext_truncate(handle, inode); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 6e70a63dcca7..36eca3bc036a 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -202,7 +202,7 @@ static long swap_inode_boot_loader(struct super_block *sb, reset_inode_seed(inode); reset_inode_seed(inode_bl); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); err = ext4_mark_inode_dirty(handle, inode); if (err < 0) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 45ac6088b4ac..132c118d12e1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2878,6 +2878,7 @@ int ext4_mb_init(struct super_block *sb) sbi->s_mb_stats = MB_DEFAULT_STATS; sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; + sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC; /* * The default group preallocation is 512, which for 4k block * sizes translates to 2 megabytes. However for bigalloc file @@ -3816,6 +3817,26 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, mb_debug(sb, "preallocated %d for group %u\n", preallocated, group); } +static void ext4_mb_mark_pa_deleted(struct super_block *sb, + struct ext4_prealloc_space *pa) +{ + struct ext4_inode_info *ei; + + if (pa->pa_deleted) { + ext4_warning(sb, "deleted pa, type:%d, pblk:%llu, lblk:%u, len:%d\n", + pa->pa_type, pa->pa_pstart, pa->pa_lstart, + pa->pa_len); + return; + } + + pa->pa_deleted = 1; + + if (pa->pa_type == MB_INODE_PA) { + ei = EXT4_I(pa->pa_inode); + atomic_dec(&ei->i_prealloc_active); + } +} + static void ext4_mb_pa_callback(struct rcu_head *head) { struct ext4_prealloc_space *pa; @@ -3848,7 +3869,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, return; } - pa->pa_deleted = 1; + ext4_mb_mark_pa_deleted(sb, pa); spin_unlock(&pa->pa_lock); grp_blk = pa->pa_pstart; @@ -3972,6 +3993,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) spin_lock(pa->pa_obj_lock); list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list); spin_unlock(pa->pa_obj_lock); + atomic_inc(&ei->i_prealloc_active); } /* @@ -4182,7 +4204,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, } /* seems this one can be freed ... */ - pa->pa_deleted = 1; + ext4_mb_mark_pa_deleted(sb, pa); /* we can trust pa_free ... */ free += pa->pa_free; @@ -4245,7 +4267,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, * * FIXME!! Make sure it is valid at all the call sites */ -void ext4_discard_preallocations(struct inode *inode) +void ext4_discard_preallocations(struct inode *inode, unsigned int needed) { struct ext4_inode_info *ei = EXT4_I(inode); struct super_block *sb = inode->i_sb; @@ -4263,15 +4285,19 @@ void ext4_discard_preallocations(struct inode *inode) mb_debug(sb, "discard preallocation for inode %lu\n", inode->i_ino); - trace_ext4_discard_preallocations(inode); + trace_ext4_discard_preallocations(inode, + atomic_read(&ei->i_prealloc_active), needed); INIT_LIST_HEAD(&list); + if (needed == 0) + needed = UINT_MAX; + repeat: /* first, collect all pa's in the inode */ spin_lock(&ei->i_prealloc_lock); - while (!list_empty(&ei->i_prealloc_list)) { - pa = list_entry(ei->i_prealloc_list.next, + while (!list_empty(&ei->i_prealloc_list) && needed) { + pa = list_entry(ei->i_prealloc_list.prev, struct ext4_prealloc_space, pa_inode_list); BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock); spin_lock(&pa->pa_lock); @@ -4288,10 +4314,11 @@ void ext4_discard_preallocations(struct inode *inode) } if (pa->pa_deleted == 0) { - pa->pa_deleted = 1; + ext4_mb_mark_pa_deleted(sb, pa); spin_unlock(&pa->pa_lock); list_del_rcu(&pa->pa_inode_list); list_add(&pa->u.pa_tmp_list, &list); + needed--; continue; } @@ -4592,7 +4619,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, BUG_ON(pa->pa_type != MB_GROUP_PA); /* seems this one can be freed ... */ - pa->pa_deleted = 1; + ext4_mb_mark_pa_deleted(sb, pa); spin_unlock(&pa->pa_lock); list_del_rcu(&pa->pa_inode_list); @@ -4690,11 +4717,30 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) return ; } +/* + * if per-inode prealloc list is too long, trim some PA + */ +static void ext4_mb_trim_inode_pa(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + int count, delta; + + count = atomic_read(&ei->i_prealloc_active); + delta = (sbi->s_mb_max_inode_prealloc >> 2) + 1; + if (count > sbi->s_mb_max_inode_prealloc + delta) { + count -= sbi->s_mb_max_inode_prealloc; + ext4_discard_preallocations(inode, count); + } +} + /* * release all resource we used in allocation */ static int ext4_mb_release_context(struct ext4_allocation_context *ac) { + struct inode *inode = ac->ac_inode; + struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_prealloc_space *pa = ac->ac_pa; if (pa) { @@ -4720,6 +4766,17 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) ext4_mb_add_n_trim(ac); } } + + if (pa->pa_type == MB_INODE_PA) { + /* + * treat per-inode prealloc list as a lru list, then try + * to trim the least recently used PA. + */ + spin_lock(pa->pa_obj_lock); + list_move(&pa->pa_inode_list, &ei->i_prealloc_list); + spin_unlock(pa->pa_obj_lock); + } + ext4_mb_put_pa(ac, ac->ac_sb, pa); } if (ac->ac_bitmap_page) @@ -4729,6 +4786,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) mutex_unlock(&ac->ac_lg->lg_mutex); ext4_mb_collect_stats(ac); + ext4_mb_trim_inode_pa(inode); return 0; } diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 6b4d17c2935d..e75b4749aa1c 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -73,6 +73,10 @@ */ #define MB_DEFAULT_GROUP_PREALLOC 512 +/* + * maximum length of inode prealloc list + */ +#define MB_DEFAULT_MAX_INODE_PREALLOC 512 struct ext4_free_data { /* this links the free block information from sb_info */ diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 1ed86fb6c302..0d601b822875 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -686,8 +686,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, out: if (*moved_len) { - ext4_discard_preallocations(orig_inode); - ext4_discard_preallocations(donor_inode); + ext4_discard_preallocations(orig_inode, 0); + ext4_discard_preallocations(donor_inode, 0); } ext4_ext_drop_refs(path); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index daa94c7f7271..13bdddc081e0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1127,6 +1127,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) inode_set_iversion(&ei->vfs_inode, 1); spin_lock_init(&ei->i_raw_lock); INIT_LIST_HEAD(&ei->i_prealloc_list); + atomic_set(&ei->i_prealloc_active, 0); spin_lock_init(&ei->i_prealloc_lock); ext4_es_init_tree(&ei->i_es_tree); rwlock_init(&ei->i_es_lock); @@ -1220,7 +1221,7 @@ void ext4_clear_inode(struct inode *inode) { invalidate_inode_buffers(inode); clear_inode(inode); - ext4_discard_preallocations(inode); + ext4_discard_preallocations(inode, 0); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); dquot_drop(inode); if (EXT4_I(inode)->jinode) { diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 7fee11cc30e7..bfabb799fa45 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -218,6 +218,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); +EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); @@ -264,6 +265,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(mb_order2_req), ATTR_LIST(mb_stream_req), ATTR_LIST(mb_group_prealloc), + ATTR_LIST(mb_max_inode_prealloc), ATTR_LIST(max_writeback_mb_bump), ATTR_LIST(extent_max_zeroout_kb), ATTR_LIST(trigger_fs_error), diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 8008d2e116b9..4c8b99ec8606 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -746,24 +746,29 @@ TRACE_EVENT(ext4_mb_release_group_pa, ); TRACE_EVENT(ext4_discard_preallocations, - TP_PROTO(struct inode *inode), + TP_PROTO(struct inode *inode, unsigned int len, unsigned int needed), - TP_ARGS(inode), + TP_ARGS(inode, len, needed), TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( unsigned int, len ) + __field( unsigned int, needed ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; + __entry->len = len; + __entry->needed = needed; ), - TP_printk("dev %d,%d ino %lu", + TP_printk("dev %d,%d ino %lu len: %u needed %u", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino) + (unsigned long) __entry->ino, __entry->len, + __entry->needed) ); TRACE_EVENT(ext4_mb_discard_preallocations, From fc666777da9da387354b1a142a6ffc5d43bc4f7e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 19 Aug 2020 11:10:51 -0600 Subject: [PATCH 203/290] io_uring: use system_unbound_wq for ring exit work We currently use system_wq, which is unbounded in terms of number of workers. This means that if we're exiting tons of rings at the same time, then we'll briefly spawn tons of event kworkers just for a very short blocking time as the rings exit. Use system_unbound_wq instead, which has a sane cap on the concurrency level. Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e325895d681b..1f2f31d93686 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7960,7 +7960,13 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) ACCT_LOCKED); INIT_WORK(&ctx->exit_work, io_ring_exit_work); - queue_work(system_wq, &ctx->exit_work); + /* + * Use system_unbound_wq to avoid spawning tons of event kworkers + * if we're exiting a ton of rings at the same time. It just adds + * noise and overhead, there's no discernable change in runtime + * over using system_wq. + */ + queue_work(system_unbound_wq, &ctx->exit_work); } static int io_uring_release(struct inode *inode, struct file *file) From 394b19d6cb58ae292c0e1ad6b893fed8ece477ce Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 4 Aug 2020 19:48:17 -0400 Subject: [PATCH 204/290] x86/boot/compressed: Use builtin mem functions for decompressor Since commits c041b5ad8640 ("x86, boot: Create a separate string.h file to provide standard string functions") fb4cac573ef6 ("x86, boot: Move memcmp() into string.h and string.c") the decompressor stub has been using the compiler's builtin memcpy, memset and memcmp functions, _except_ where it would likely have the largest impact, in the decompression code itself. Remove the #undef's of memcpy and memset in misc.c so that the decompressor code also uses the compiler builtins. The rationale given in the comment doesn't really apply: just because some functions use the out-of-line version is no reason to not use the builtin version in the rest. Replace the comment with an explanation of why memzero and memmove are being #define'd. Drop the suggestion to #undef in boot/string.h as well: the out-of-line versions are not really optimized versions, they're generic code that's good enough for the preboot environment. The compiler will likely generate better code for constant-size memcpy/memset/memcmp if it is allowed to. Most decompressors' performance is unchanged, with the exception of LZ4 and 64-bit ZSTD. Before After ARCH LZ4 73ms 10ms 32 LZ4 120ms 10ms 64 ZSTD 90ms 74ms 64 Measurements on QEMU on 2.2GHz Broadwell Xeon, using defconfig kernels. Decompressor code size has small differences, with the largest being that 64-bit ZSTD decreases just over 2k. The largest code size increase was on 64-bit XZ, of about 400 bytes. Signed-off-by: Arvind Sankar Suggested-by: Nick Terrell Tested-by: Nick Terrell Acked-by: Kees Cook Signed-off-by: Linus Torvalds --- arch/x86/boot/compressed/misc.c | 7 ++----- arch/x86/boot/string.h | 5 +---- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 39e592d0e0b4..e478e40fbe5a 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -30,12 +30,9 @@ #define STATIC static /* - * Use normal definitions of mem*() from string.c. There are already - * included header files which expect a definition of memset() and by - * the time we define memset macro, it is too late. + * Provide definitions of memzero and memmove as some of the decompressors will + * try to define their own functions if these are not defined as macros. */ -#undef memcpy -#undef memset #define memzero(s, n) memset((s), 0, (n)) #define memmove memmove diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h index 995f7b7ad512..a232da487cd2 100644 --- a/arch/x86/boot/string.h +++ b/arch/x86/boot/string.h @@ -11,10 +11,7 @@ void *memcpy(void *dst, const void *src, size_t len); void *memset(void *dst, int c, size_t len); int memcmp(const void *s1, const void *s2, size_t len); -/* - * Access builtin version by default. If one needs to use optimized version, - * do "undef memcpy" in .c file and link against right string.c - */ +/* Access builtin version by default. */ #define memcpy(d,s,l) __builtin_memcpy(d,s,l) #define memset(d,c,l) __builtin_memset(d,c,l) #define memcmp __builtin_memcmp From 33d0f96ffd7394ffb208bb366be312d12dfd24a4 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Wed, 19 Aug 2020 10:08:16 -0400 Subject: [PATCH 205/290] lib/string.c: Use freestanding environment gcc can transform the loop in a naive implementation of memset/memcpy etc into a call to the function itself. This optimization is enabled by -ftree-loop-distribute-patterns. This has been the case for a while, but gcc-10.x enables this option at -O2 rather than -O3 as in previous versions. Add -ffreestanding, which implicitly disables this optimization with gcc. It is unclear whether clang performs such optimizations, but hopefully it will also not do so in a freestanding environment. Signed-off-by: Arvind Sankar Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56888 Signed-off-by: Linus Torvalds --- lib/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/Makefile b/lib/Makefile index e290fc5707ea..a4a4c6864f51 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -15,11 +15,16 @@ KCOV_INSTRUMENT_debugobjects.o := n KCOV_INSTRUMENT_dynamic_debug.o := n KCOV_INSTRUMENT_fault-inject.o := n +# string.o implements standard library functions like memset/memcpy etc. +# Use -ffreestanding to ensure that the compiler does not try to "optimize" +# them into calls to themselves. +CFLAGS_string.o := -ffreestanding + # Early boot use of cmdline, don't instrument it ifdef CONFIG_AMD_MEM_ENCRYPT KASAN_SANITIZE_string.o := n -CFLAGS_string.o := -fno-stack-protector +CFLAGS_string.o += -fno-stack-protector endif # Used by KCSAN while enabled, avoid recursion. From 43647929175e2cd30a6a1b1a5d385704bbf934a3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Aug 2020 14:48:50 +0200 Subject: [PATCH 206/290] dt: writing-schema: Miscellaneous grammar fixes - Add missing verb, - Fix accidental plural. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20200819124850.20543-1-geert+renesas@glider.be --- Documentation/devicetree/writing-schema.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/writing-schema.rst b/Documentation/devicetree/writing-schema.rst index 8c74a99f95e2..16f21e182ff6 100644 --- a/Documentation/devicetree/writing-schema.rst +++ b/Documentation/devicetree/writing-schema.rst @@ -5,7 +5,7 @@ Writing DeviceTree Bindings in json-schema Devicetree bindings are written using json-schema vocabulary. Schema files are written in a JSON compatible subset of YAML. YAML is used instead of JSON as it -considered more human readable and has some advantages such as allowing +is considered more human readable and has some advantages such as allowing comments (Prefixed with '#'). Schema Contents @@ -19,7 +19,7 @@ $id A json-schema unique identifier string. The string must be a valid URI typically containing the binding's filename and path. For DT schema, it must begin with "http://devicetree.org/schemas/". The URL is used in constructing - references to other files specified in schema "$ref" properties. A $ref values + references to other files specified in schema "$ref" properties. A $ref value with a leading '/' will have the hostname prepended. A $ref value a relative path or filename only will be prepended with the hostname and path components of the current schema file's '$id' value. A URL is used even for local files, From d1ac0002dd297069bb8448c2764c9c31c4668441 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 19 Aug 2020 10:42:55 +0100 Subject: [PATCH 207/290] of: address: Work around missing device_type property in pcie nodes Recent changes to the DT PCI bus parsing made it mandatory for device tree nodes describing a PCI controller to have the 'device_type = "pci"' property for the node to be matched. Although this follows the letter of the specification, it breaks existing device-trees that have been working fine for years. Rockchip rk3399-based systems are a prime example of such collateral damage, and have stopped discovering their PCI bus. In order to paper over it, let's add a workaround to the code matching the device type, and accept as PCI any node that is named "pcie", A warning will hopefully nudge the user into updating their DT to a fixed version if they can, but the incentive is obviously pretty small. Fixes: 2f96593ecc37 ("of_address: Add bus type match for pci ranges parser") Suggested-by: Rob Herring Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200819094255.474565-1-maz@kernel.org Signed-off-by: Rob Herring --- drivers/of/address.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 945b3d785f44..da4f7341323f 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -128,15 +128,29 @@ static unsigned int of_bus_pci_get_flags(const __be32 *addr) * PCI bus specific translator */ +static bool of_node_is_pcie(struct device_node *np) +{ + bool is_pcie = of_node_name_eq(np, "pcie"); + + if (is_pcie) + pr_warn_once("%pOF: Missing device_type\n", np); + + return is_pcie; +} + static int of_bus_pci_match(struct device_node *np) { /* * "pciex" is PCI Express * "vci" is for the /chaos bridge on 1st-gen PCI powermacs * "ht" is hypertransport + * + * If none of the device_type match, and that the node name is + * "pcie", accept the device as PCI (with a warning). */ return of_node_is_type(np, "pci") || of_node_is_type(np, "pciex") || - of_node_is_type(np, "vci") || of_node_is_type(np, "ht"); + of_node_is_type(np, "vci") || of_node_is_type(np, "ht") || + of_node_is_pcie(np); } static void of_bus_pci_count_cells(struct device_node *np, From 63d4a4c145cca2e84dc6e62d2ef5cb990c9723c2 Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Wed, 19 Aug 2020 20:28:36 +0300 Subject: [PATCH 208/290] net: ena: Prevent reset after device destruction The reset work is scheduled by the timer routine whenever it detects that a device reset is required (e.g. when a keep_alive signal is missing). When releasing device resources in ena_destroy_device() the driver cancels the scheduling of the timer routine without destroying the reset work explicitly. This creates the following bug: The driver is suspended and the ena_suspend() function is called -> This function calls ena_destroy_device() to free the net device resources -> The driver waits for the timer routine to finish its execution and then cancels it, thus preventing from it to be called again. If, in its final execution, the timer routine schedules a reset, the reset routine might be called afterwards,and a redundant call to ena_restore_device() would be made. By changing the reset routine we allow it to read the device's state accurately. This is achieved by checking whether ENA_FLAG_TRIGGER_RESET flag is set before resetting the device and making both the destruction function and the flag check are under rtnl lock. The ENA_FLAG_TRIGGER_RESET is cleared at the end of the destruction routine. Also surround the flag check with 'likely' because we expect that the reset routine would be called only when ENA_FLAG_TRIGGER_RESET flag is set. The destruction of the timer and reset services in __ena_shutoff() have to stay, even though the timer routine is destroyed in ena_destroy_device(). This is to avoid a case in which the reset routine is scheduled after free_netdev() in __ena_shutoff(), which would create an access to freed memory in adapter->flags. Fixes: 8c5c7abdeb2d ("net: ena: add power management ops to the ENA driver") Signed-off-by: Shay Agroskin Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 2a6c9725e092..44aeace196f0 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3601,16 +3601,14 @@ static void ena_fw_reset_device(struct work_struct *work) { struct ena_adapter *adapter = container_of(work, struct ena_adapter, reset_task); - struct pci_dev *pdev = adapter->pdev; - if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { - dev_err(&pdev->dev, - "device reset schedule while reset bit is off\n"); - return; - } rtnl_lock(); - ena_destroy_device(adapter, false); - ena_restore_device(adapter); + + if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + ena_destroy_device(adapter, false); + ena_restore_device(adapter); + } + rtnl_unlock(); } @@ -4389,8 +4387,11 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) netdev->rx_cpu_rmap = NULL; } #endif /* CONFIG_RFS_ACCEL */ - del_timer_sync(&adapter->timer_service); + /* Make sure timer and reset routine won't be called after + * freeing device resources. + */ + del_timer_sync(&adapter->timer_service); cancel_work_sync(&adapter->reset_task); rtnl_lock(); /* lock released inside the below if-else block */ From 8b147f6f3e7de4e51113e3e9ec44aa2debc02c58 Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Wed, 19 Aug 2020 20:28:37 +0300 Subject: [PATCH 209/290] net: ena: Change WARN_ON expression in ena_del_napi_in_range() The ena_del_napi_in_range() function unregisters the napi handler for rings in a given range. This function had the following WARN_ON macro: WARN_ON(ENA_IS_XDP_INDEX(adapter, i) && adapter->ena_napi[i].xdp_ring); This macro prints the call stack if the expression inside of it is true [1], but the expression inside of it is the wanted situation. The expression checks whether the ring has an XDP queue and its index corresponds to a XDP one. This patch changes the expression to !ENA_IS_XDP_INDEX(adapter, i) && adapter->ena_napi[i].xdp_ring which indicates an unwanted situation. Also, change the structure of the function. The napi handler is unregistered for all rings, and so there's no need to check whether the index is an XDP index or not. By removing this check the code becomes much more readable. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: Shay Agroskin Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 44aeace196f0..233db15c970d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2180,13 +2180,10 @@ static void ena_del_napi_in_range(struct ena_adapter *adapter, int i; for (i = first_index; i < first_index + count; i++) { - /* Check if napi was initialized before */ - if (!ENA_IS_XDP_INDEX(adapter, i) || - adapter->ena_napi[i].xdp_ring) - netif_napi_del(&adapter->ena_napi[i].napi); - else - WARN_ON(ENA_IS_XDP_INDEX(adapter, i) && - adapter->ena_napi[i].xdp_ring); + netif_napi_del(&adapter->ena_napi[i].napi); + + WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) && + adapter->ena_napi[i].xdp_ring); } } From ccd143e5150f24b9ba15145c7221b61dd9e41021 Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Wed, 19 Aug 2020 20:28:38 +0300 Subject: [PATCH 210/290] net: ena: Make missed_tx stat incremental Most statistics in ena driver are incremented, meaning that a stat's value is a sum of all increases done to it since driver/queue initialization. This patch makes all statistics this way, effectively making missed_tx statistic incremental. Also added a comment regarding rx_drops and tx_drops to make it clearer how these counters are calculated. Fixes: 11095fdb712b ("net: ena: add statistics for missed tx packets") Signed-off-by: Shay Agroskin Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 233db15c970d..a3a8edf9a734 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3687,7 +3687,7 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter, } u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.missed_tx = missed_tx; + tx_ring->tx_stats.missed_tx += missed_tx; u64_stats_update_end(&tx_ring->syncp); return rc; @@ -4556,6 +4556,9 @@ static void ena_keep_alive_wd(void *adapter_data, tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low; u64_stats_update_begin(&adapter->syncp); + /* These stats are accumulated by the device, so the counters indicate + * all drops since last reset. + */ adapter->dev_stats.rx_drops = rx_drops; adapter->dev_stats.tx_drops = tx_drops; u64_stats_update_end(&adapter->syncp); From d1fb55592909ea249af70170c7a52e637009564d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Aug 2020 21:52:38 +0200 Subject: [PATCH 211/290] netlink: fix state reallocation in policy export Evidently, when I did this previously, we didn't have more than 10 policies and didn't run into the reallocation path, because it's missing a memset() for the unused policies. Fix that. Fixes: d07dcf9aadd6 ("netlink: add infrastructure to expose policies to userspace") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/policy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netlink/policy.c b/net/netlink/policy.c index f6491853c797..2b3e26f7496f 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -51,6 +51,9 @@ static int add_policy(struct nl_policy_dump **statep, if (!state) return -ENOMEM; + memset(&state->policies[state->n_alloc], 0, + flex_array_size(state, policies, n_alloc - state->n_alloc)); + state->policies[state->n_alloc].policy = policy; state->policies[state->n_alloc].maxtype = maxtype; state->n_alloc = n_alloc; From 957ff4278e0db34f56c2bc121fdd6393e4523ef2 Mon Sep 17 00:00:00 2001 From: Min Li Date: Tue, 18 Aug 2020 10:41:22 -0400 Subject: [PATCH 212/290] ptp: ptp_clockmatrix: use i2c_master_send for i2c write The old code for i2c write would break on some controllers, which fails at handling Repeated Start Condition. So we will just use i2c_master_send to handle write in one transanction. Changes since v1: - Remove indentation change Signed-off-by: Min Li Signed-off-by: David S. Miller --- drivers/ptp/ptp_clockmatrix.c | 56 +++++++++++++++++++++++++++-------- drivers/ptp/ptp_clockmatrix.h | 2 ++ 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c index 73aaae5574ed..e020faff7da5 100644 --- a/drivers/ptp/ptp_clockmatrix.c +++ b/drivers/ptp/ptp_clockmatrix.c @@ -142,16 +142,15 @@ static int idtcm_strverscmp(const char *ver1, const char *ver2) return result; } -static int idtcm_xfer(struct idtcm *idtcm, - u8 regaddr, - u8 *buf, - u16 count, - bool write) +static int idtcm_xfer_read(struct idtcm *idtcm, + u8 regaddr, + u8 *buf, + u16 count) { struct i2c_client *client = idtcm->client; struct i2c_msg msg[2]; int cnt; - char *fmt = "i2c_transfer failed at %d in %s for %s, at addr: %04X!\n"; + char *fmt = "i2c_transfer failed at %d in %s, at addr: %04X!\n"; msg[0].addr = client->addr; msg[0].flags = 0; @@ -159,7 +158,7 @@ static int idtcm_xfer(struct idtcm *idtcm, msg[0].buf = ®addr; msg[1].addr = client->addr; - msg[1].flags = write ? 0 : I2C_M_RD; + msg[1].flags = I2C_M_RD; msg[1].len = count; msg[1].buf = buf; @@ -170,7 +169,6 @@ static int idtcm_xfer(struct idtcm *idtcm, fmt, __LINE__, __func__, - write ? "write" : "read", regaddr); return cnt; } else if (cnt != 2) { @@ -182,6 +180,37 @@ static int idtcm_xfer(struct idtcm *idtcm, return 0; } +static int idtcm_xfer_write(struct idtcm *idtcm, + u8 regaddr, + u8 *buf, + u16 count) +{ + struct i2c_client *client = idtcm->client; + /* we add 1 byte for device register */ + u8 msg[IDTCM_MAX_WRITE_COUNT + 1]; + int cnt; + char *fmt = "i2c_master_send failed at %d in %s, at addr: %04X!\n"; + + if (count > IDTCM_MAX_WRITE_COUNT) + return -EINVAL; + + msg[0] = regaddr; + memcpy(&msg[1], buf, count); + + cnt = i2c_master_send(client, msg, count + 1); + + if (cnt < 0) { + dev_err(&client->dev, + fmt, + __LINE__, + __func__, + regaddr); + return cnt; + } + + return 0; +} + static int idtcm_page_offset(struct idtcm *idtcm, u8 val) { u8 buf[4]; @@ -195,7 +224,7 @@ static int idtcm_page_offset(struct idtcm *idtcm, u8 val) buf[2] = 0x10; buf[3] = 0x20; - err = idtcm_xfer(idtcm, PAGE_ADDR, buf, sizeof(buf), 1); + err = idtcm_xfer_write(idtcm, PAGE_ADDR, buf, sizeof(buf)); if (err) { idtcm->page_offset = 0xff; @@ -223,11 +252,12 @@ static int _idtcm_rdwr(struct idtcm *idtcm, err = idtcm_page_offset(idtcm, hi); if (err) - goto out; + return err; - err = idtcm_xfer(idtcm, lo, buf, count, write); -out: - return err; + if (write) + return idtcm_xfer_write(idtcm, lo, buf, count); + + return idtcm_xfer_read(idtcm, lo, buf, count); } static int idtcm_read(struct idtcm *idtcm, diff --git a/drivers/ptp/ptp_clockmatrix.h b/drivers/ptp/ptp_clockmatrix.h index ffae56c5d97f..82840d72364a 100644 --- a/drivers/ptp/ptp_clockmatrix.h +++ b/drivers/ptp/ptp_clockmatrix.h @@ -55,6 +55,8 @@ #define PEROUT_ENABLE_OUTPUT_MASK (0xdeadbeef) +#define IDTCM_MAX_WRITE_COUNT (512) + /* Values of DPLL_N.DPLL_MODE.PLL_MODE */ enum pll_mode { PLL_MODE_MIN = 0, From 9553b62c1dd27df67ab2f52ec8a3bc3501887619 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 18 Aug 2020 18:14:39 +0200 Subject: [PATCH 213/290] net: atlantic: Use readx_poll_timeout() for large timeout Commit 8dcf2ad39fdb2 ("net: atlantic: add hwmon getter for MAC temperature") implemented a read callback with an udelay(10000U). This fails to compile on ARM because the delay is >1ms. I doubt that it is needed to spin for 10ms even if possible on x86. >From looking at the code, the context appears to be preemptible so using usleep() should work and avoid busy spinning. Use readx_poll_timeout() in the poll loop. Fixes: 8dcf2ad39fdb2 ("net: atlantic: add hwmon getter for MAC temperature") Cc: Mark Starovoytov Cc: Igor Russkikh Signed-off-by: Sebastian Andrzej Siewior Acked-by: Guenter Roeck Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 16a944707ba9..8941ac4df9e3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -1631,8 +1631,8 @@ static int hw_atl_b0_get_mac_temp(struct aq_hw_s *self, u32 *temp) hw_atl_ts_reset_set(self, 0); } - err = readx_poll_timeout_atomic(hw_atl_b0_ts_ready_and_latch_high_get, - self, val, val == 1, 10000U, 500000U); + err = readx_poll_timeout(hw_atl_b0_ts_ready_and_latch_high_get, self, + val, val == 1, 10000U, 500000U); if (err) return err; From cf96d977381d4a23957bade2ddf1c420b74a26b6 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 19 Aug 2020 10:33:09 +0800 Subject: [PATCH 214/290] net: gemini: Fix missing free_netdev() in error path of gemini_ethernet_port_probe() Replace alloc_etherdev_mq with devm_alloc_etherdev_mqs. In this way, when probe fails, netdev can be freed automatically. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/cortina/gemini.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 66e67b24a887..62e271aea4a5 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -2389,7 +2389,7 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) dev_info(dev, "probe %s ID %d\n", dev_name(dev), id); - netdev = alloc_etherdev_mq(sizeof(*port), TX_QUEUE_NUM); + netdev = devm_alloc_etherdev_mqs(dev, sizeof(*port), TX_QUEUE_NUM, TX_QUEUE_NUM); if (!netdev) { dev_err(dev, "Can't allocate ethernet device #%d\n", id); return -ENOMEM; @@ -2521,7 +2521,6 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) } port->netdev = NULL; - free_netdev(netdev); return ret; } @@ -2530,7 +2529,6 @@ static int gemini_ethernet_port_remove(struct platform_device *pdev) struct gemini_ethernet_port *port = platform_get_drvdata(pdev); gemini_port_remove(port); - free_netdev(port->netdev); return 0; } From e17a7c0e0aebb956719ce2a8465f649859c2da7d Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 19 Aug 2020 15:07:41 +0200 Subject: [PATCH 215/290] powerpc/powernv/pci: Fix possible crash when releasing DMA resources Fix a typo introduced during recent code cleanup, which could lead to silently not freeing resources or an oops message (on PCI hotplug or CAPI reset). Only impacts ioda2, the code path for ioda1 is correct. Fixes: 01e12629af4e ("powerpc/powernv/pci: Add explicit tracking of the DMA setup state") Signed-off-by: Frederic Barrat Reviewed-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200819130741.16769-1-fbarrat@linux.ibm.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c9c25fb0783c..023a4f987bb2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2705,7 +2705,7 @@ void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) struct iommu_table *tbl = pe->table_group.tables[0]; int64_t rc; - if (pe->dma_setup_done) + if (!pe->dma_setup_done) return; rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); From da2446b66b5e2c7f3ab63912c8d999810e35e8b3 Mon Sep 17 00:00:00 2001 From: Jiansong Chen Date: Mon, 17 Aug 2020 22:38:38 +0800 Subject: [PATCH 216/290] Revert "drm/amdgpu: disable gfxoff for navy_flounder" This reverts commit 9c9b17a7d19a8e21db2e378784fff1128b46c9d3. Newly released sdma fw (51.52) provides a fix for the issue. Signed-off-by: Jiansong Chen Reviewed-by: Kenneth Feng Reviewed-by: Tao Zhou Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 142ffe2da758..65997ffaed45 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3595,9 +3595,6 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) if (!gfx_v10_0_navi10_gfxoff_should_enable(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; break; - case CHIP_NAVY_FLOUNDER: - adev->pm.pp_feature &= ~PP_GFXOFF_MASK; - break; default: break; } From 6163a985e50cb19d5bdf73f98e45b8af91a77658 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 10 Jul 2020 16:16:51 +0200 Subject: [PATCH 217/290] efi: avoid error message when booting under Xen efifb_probe() will issue an error message in case the kernel is booted as Xen dom0 from UEFI as EFI_MEMMAP won't be set in this case. Avoid that message by calling efi_mem_desc_lookup() only if EFI_MEMMAP is set. Fixes: 38ac0287b7f4 ("fbdev/efifb: Honour UEFI memory map attributes when mapping the FB") Signed-off-by: Juergen Gross Acked-by: Ard Biesheuvel Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Juergen Gross --- drivers/video/fbdev/efifb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index 65491ae74808..e57c00824965 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -453,7 +453,7 @@ static int efifb_probe(struct platform_device *dev) info->apertures->ranges[0].base = efifb_fix.smem_start; info->apertures->ranges[0].size = size_remap; - if (efi_enabled(EFI_BOOT) && + if (efi_enabled(EFI_MEMMAP) && !efi_mem_desc_lookup(efifb_fix.smem_start, &md)) { if ((efifb_fix.smem_start + efifb_fix.smem_len) > (md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT))) { From ee87e1557c42dc9c2da11c38e11b87c311569853 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 20 Aug 2020 06:30:47 +0200 Subject: [PATCH 218/290] Fix build error when CONFIG_ACPI is not set/enabled: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ../arch/x86/pci/xen.c: In function ‘pci_xen_init’: ../arch/x86/pci/xen.c:410:2: error: implicit declaration of function ‘acpi_noirq_set’; did you mean ‘acpi_irq_get’? [-Werror=implicit-function-declaration] acpi_noirq_set(); Fixes: 88e9ca161c13 ("xen/pci: Use acpi_noirq_set() helper to avoid #ifdef") Signed-off-by: Randy Dunlap Reviewed-by: Juergen Gross Cc: Andy Shevchenko Cc: Bjorn Helgaas Cc: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xenproject.org Cc: linux-pci@vger.kernel.org Signed-off-by: Juergen Gross --- arch/x86/pci/xen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e3f1ca316068..db34fee93138 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -26,6 +26,7 @@ #include #include #include +#include #include static int xen_pcifront_enable_irq(struct pci_dev *dev) From a4501bac0e553bed117b7e1b166d49731caf7260 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Mon, 10 Aug 2020 12:36:19 +0530 Subject: [PATCH 219/290] opp: Enable resources again if they were disabled earlier dev_pm_opp_set_rate() can now be called with freq = 0 in order to either drop performance or bandwidth votes or to disable regulators on platforms which support them. In such cases, a subsequent call to dev_pm_opp_set_rate() with the same frequency ends up returning early because 'old_freq == freq' Instead make it fall through and put back the dropped performance and bandwidth votes and/or enable back the regulators. Cc: v5.3+ # v5.3+ Fixes: cd7ea582866f ("opp: Make dev_pm_opp_set_rate() handle freq = 0 to drop performance votes") Reported-by: Sajida Bhanu Reviewed-by: Sibi Sankar Reported-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Reviewed-by: Stephen Boyd Signed-off-by: Rajendra Nayak [ Viresh: Don't skip clk_set_rate() and massaged changelog ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index bdb028c7793d..9668ea04cc80 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -934,10 +934,13 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) /* Return early if nothing to do */ if (old_freq == freq) { - dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n", - __func__, freq); - ret = 0; - goto put_opp_table; + if (!opp_table->required_opp_tables && !opp_table->regulators && + !opp_table->paths) { + dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n", + __func__, freq); + ret = 0; + goto put_opp_table; + } } /* From c8502eb2d43b6b9b1dc382299a4d37031be63876 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 17 Jul 2020 15:45:26 -0400 Subject: [PATCH 220/290] efi/x86: Mark kernel rodata non-executable for mixed mode When remapping the kernel rodata section RO in the EFI pagetables, the protection flags that were used for the text section are being reused, but the rodata section should not be marked executable. Cc: Signed-off-by: Arvind Sankar Link: https://lore.kernel.org/r/20200717194526.3452089-1-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel --- arch/x86/platform/efi/efi_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 413583f904a6..6af4da1149ba 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -259,6 +259,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT; rodata = __pa(__start_rodata); pfn = rodata >> PAGE_SHIFT; + + pf = _PAGE_NX | _PAGE_ENC; if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) { pr_err("Failed to map kernel rodata 1:1\n"); return 1; From 98086df8b70c06234a8f4290c46064e44dafa0ed Mon Sep 17 00:00:00 2001 From: Li Heng Date: Mon, 20 Jul 2020 15:22:18 +0800 Subject: [PATCH 221/290] efi: add missed destroy_workqueue when efisubsys_init fails destroy_workqueue() should be called to destroy efi_rts_wq when efisubsys_init() init resources fails. Cc: Reported-by: Hulk Robot Signed-off-by: Li Heng Link: https://lore.kernel.org/r/1595229738-10087-1-git-send-email-liheng40@huawei.com Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index fdd1db025dbf..3aa07c3b5136 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -381,6 +381,7 @@ static int __init efisubsys_init(void) efi_kobj = kobject_create_and_add("efi", firmware_kobj); if (!efi_kobj) { pr_err("efi: Firmware registration failed.\n"); + destroy_workqueue(efi_rts_wq); return -ENOMEM; } @@ -424,6 +425,7 @@ static int __init efisubsys_init(void) generic_ops_unregister(); err_put: kobject_put(efi_kobj); + destroy_workqueue(efi_rts_wq); return error; } From 1fd9717d75df68e3c3509b8e7b1138ca63472f88 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Sat, 25 Jul 2020 11:59:16 -0400 Subject: [PATCH 222/290] efi/libstub: Stop parsing arguments at "--" Arguments after "--" are arguments for init, not for the kernel. Cc: Signed-off-by: Arvind Sankar Link: https://lore.kernel.org/r/20200725155916.1376773-1-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efi-stub-helper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 6bca70bbb43d..37ff34e7b85e 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -201,6 +201,8 @@ efi_status_t efi_parse_options(char const *cmdline) char *param, *val; str = next_arg(str, ¶m, &val); + if (!val && !strcmp(param, "--")) + break; if (!strcmp(param, "nokaslr")) { efi_nokaslr = true; From a37ca6a2af9df2972372b918f09390c9303acfbd Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Wed, 29 Jul 2020 15:33:00 -0400 Subject: [PATCH 223/290] efi/libstub: Handle NULL cmdline Treat a NULL cmdline the same as empty. Although this is unlikely to happen in practice, the x86 kernel entry does check for NULL cmdline and handles it, so do it here as well. Cc: Signed-off-by: Arvind Sankar Link: https://lore.kernel.org/r/20200729193300.598448-1-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efi-stub-helper.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 37ff34e7b85e..f53652a3a106 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -187,10 +187,14 @@ int efi_printk(const char *fmt, ...) */ efi_status_t efi_parse_options(char const *cmdline) { - size_t len = strlen(cmdline) + 1; + size_t len; efi_status_t status; char *str, *buf; + if (!cmdline) + return EFI_SUCCESS; + + len = strlen(cmdline) + 1; status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, len, (void **)&buf); if (status != EFI_SUCCESS) return status; From 8a8a3237a78cbc0557f0eb16a89f16d616323e99 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Thu, 13 Aug 2020 14:58:11 -0400 Subject: [PATCH 224/290] efi/libstub: Handle unterminated cmdline Make the command line parsing more robust, by handling the case it is not NUL-terminated. Use strnlen instead of strlen, and make sure that the temporary copy is NUL-terminated before parsing. Cc: Signed-off-by: Arvind Sankar Link: https://lore.kernel.org/r/20200813185811.554051-4-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efi-stub-helper.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index f53652a3a106..f735db55adc0 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -194,12 +194,14 @@ efi_status_t efi_parse_options(char const *cmdline) if (!cmdline) return EFI_SUCCESS; - len = strlen(cmdline) + 1; + len = strnlen(cmdline, COMMAND_LINE_SIZE - 1) + 1; status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, len, (void **)&buf); if (status != EFI_SUCCESS) return status; - str = skip_spaces(memcpy(buf, cmdline, len)); + memcpy(buf, cmdline, len - 1); + buf[len - 1] = '\0'; + str = skip_spaces(buf); while (*str) { char *param, *val; From 39ada88f9c862c1ff8929ff67e0d1199c7af73fe Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 13 Aug 2020 19:38:17 +0200 Subject: [PATCH 225/290] efi/x86: Move 32-bit code into efi_32.c Now that the old memmap code has been removed, some code that was left behind in arch/x86/platform/efi/efi.c is only used for 32-bit builds, which means it can live in efi_32.c as well. So move it over. Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/efi.h | 10 ----- arch/x86/platform/efi/efi.c | 69 ---------------------------------- arch/x86/platform/efi/efi_32.c | 44 ++++++++++++++++++---- 3 files changed, 37 insertions(+), 86 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b9c2667ac46c..bc9758ef292e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -81,11 +81,8 @@ extern unsigned long efi_fw_vendor, efi_config_table; kernel_fpu_end(); \ }) - #define arch_efi_call_virt(p, f, args...) p->f(args) -#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) - #else /* !CONFIG_X86_32 */ #define EFI_LOADER_SIGNATURE "EL64" @@ -125,9 +122,6 @@ struct efi_scratch { kernel_fpu_end(); \ }) -extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, - u32 type, u64 attribute); - #ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present @@ -143,17 +137,13 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, #endif /* CONFIG_X86_32 */ extern struct efi_scratch efi_scratch; -extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); extern void __init efi_print_memmap(void); -extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); -extern void __init old_map_region(efi_memory_desc_t *md); -extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_update_mappings(void); extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f6ea8f1a9d57..d37ebe6e70d7 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include @@ -496,74 +495,6 @@ void __init efi_init(void) efi_print_memmap(); } -#if defined(CONFIG_X86_32) - -void __init efi_set_executable(efi_memory_desc_t *md, bool executable) -{ - u64 addr, npages; - - addr = md->virt_addr; - npages = md->num_pages; - - memrange_efi_to_native(&addr, &npages); - - if (executable) - set_memory_x(addr, npages); - else - set_memory_nx(addr, npages); -} - -void __init runtime_code_page_mkexec(void) -{ - efi_memory_desc_t *md; - - /* Make EFI runtime service code area executable */ - for_each_efi_memory_desc(md) { - if (md->type != EFI_RUNTIME_SERVICES_CODE) - continue; - - efi_set_executable(md, true); - } -} - -void __init efi_memory_uc(u64 addr, unsigned long size) -{ - unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; - u64 npages; - - npages = round_up(size, page_shift) / page_shift; - memrange_efi_to_native(&addr, &npages); - set_memory_uc(addr, npages); -} - -void __init old_map_region(efi_memory_desc_t *md) -{ - u64 start_pfn, end_pfn, end; - unsigned long size; - void *va; - - start_pfn = PFN_DOWN(md->phys_addr); - size = md->num_pages << PAGE_SHIFT; - end = md->phys_addr + size; - end_pfn = PFN_UP(end); - - if (pfn_range_is_mapped(start_pfn, end_pfn)) { - va = __va(md->phys_addr); - - if (!(md->attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)va, size); - } else - va = efi_ioremap(md->phys_addr, size, - md->type, md->attribute); - - md->virt_addr = (u64) (unsigned long) va; - if (!va) - pr_err("ioremap of 0x%llX failed!\n", - (unsigned long long)md->phys_addr); -} - -#endif - /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 826ead67753d..e06a199423c0 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -29,9 +29,35 @@ #include #include #include +#include #include #include +void __init efi_map_region(efi_memory_desc_t *md) +{ + u64 start_pfn, end_pfn, end; + unsigned long size; + void *va; + + start_pfn = PFN_DOWN(md->phys_addr); + size = md->num_pages << PAGE_SHIFT; + end = md->phys_addr + size; + end_pfn = PFN_UP(end); + + if (pfn_range_is_mapped(start_pfn, end_pfn)) { + va = __va(md->phys_addr); + + if (!(md->attribute & EFI_MEMORY_WB)) + set_memory_uc((unsigned long)va, md->num_pages); + } else { + va = ioremap_cache(md->phys_addr, size); + } + + md->virt_addr = (unsigned long)va; + if (!va) + pr_err("ioremap of 0x%llX failed!\n", md->phys_addr); +} + /* * To make EFI call EFI runtime service in physical addressing mode we need * prolog/epilog before/after the invocation to claim the EFI runtime service @@ -58,11 +84,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; } -void __init efi_map_region(efi_memory_desc_t *md) -{ - old_map_region(md); -} - void __init efi_map_region_fixed(efi_memory_desc_t *md) {} void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} @@ -107,6 +128,15 @@ efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, void __init efi_runtime_update_mappings(void) { - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); + if (__supported_pte_mask & _PAGE_NX) { + efi_memory_desc_t *md; + + /* Make EFI runtime service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type != EFI_RUNTIME_SERVICES_CODE) + continue; + + set_memory_x(md->virt_addr, md->num_pages); + } + } } From fb1201aececc59990b75ef59fca93ae4aa1e1444 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Aug 2020 12:00:17 +0200 Subject: [PATCH 226/290] Documentation: efi: remove description of efi=old_map The old EFI runtime region mapping logic that was kept around for some time has finally been removed entirely, along with the SGI UV1 support code that was its last remaining user. So remove any mention of the efi=old_map command line parameter from the docs. Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Signed-off-by: Ard Biesheuvel --- Documentation/admin-guide/kernel-parameters.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdc1f33fd3d1..a1068742a6df 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1233,8 +1233,7 @@ efi= [EFI] Format: { "debug", "disable_early_pci_dma", "nochunk", "noruntime", "nosoftreserve", - "novamap", "no_disable_early_pci_dma", - "old_map" } + "novamap", "no_disable_early_pci_dma" } debug: enable misc debug output. disable_early_pci_dma: disable the busmaster bit on all PCI bridges while in the EFI boot stub. @@ -1251,8 +1250,6 @@ novamap: do not call SetVirtualAddressMap(). no_disable_early_pci_dma: Leave the busmaster bit set on all PCI bridges while in the EFI boot stub - old_map [X86-64]: switch to the old ioremap-based EFI - runtime services mapping. [Needs CONFIG_X86_UV=y] efi_no_storage_paranoia [EFI; X86] Using this parameter you can use more than 50% of From 17899eaf88d689529b866371344c8f269ba79b5f Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 6 Aug 2020 08:46:32 -0400 Subject: [PATCH 227/290] powerpc/perf: Fix soft lockups due to missed interrupt accounting Performance monitor interrupt handler checks if any counter has overflown and calls record_and_restart() in core-book3s which invokes perf_event_overflow() to record the sample information. Apart from creating sample, perf_event_overflow() also does the interrupt and period checks via perf_event_account_interrupt(). Currently we record information only if the SIAR (Sampled Instruction Address Register) valid bit is set (using siar_valid() check) and hence the interrupt check. But it is possible that we do sampling for some events that are not generating valid SIAR, and hence there is no chance to disable the event if interrupts are more than max_samples_per_tick. This leads to soft lockup. Fix this by adding perf_event_account_interrupt() in the invalid SIAR code path for a sampling event. ie if SIAR is invalid, just do interrupt check and don't record the sample information. Reported-by: Alexey Kardashevskiy Signed-off-by: Athira Rajeev Tested-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1596717992-7321-1-git-send-email-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 00038650a007..93d20e1ed845 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2141,6 +2141,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); + } else if (period) { + /* Account for interrupt in case of invalid SIAR */ + if (perf_event_account_interrupt(event)) + power_pmu_stop(event, 0); } } From 4469add9d3b3cdade854997720ebfeb53a83d761 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Aug 2020 08:58:24 +0100 Subject: [PATCH 228/290] RDMA/core: Fix spelling mistake "Could't" -> "Couldn't" There is a spelling mistake in a pr_warn message. Fix it. Link: https://lore.kernel.org/r/20200810075824.46770-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ef0cd2998671..c36b4d2b61e0 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2751,7 +2751,7 @@ static int __init ib_core_init(void) ret = addr_init(); if (ret) { - pr_warn("Could't init IB address resolution\n"); + pr_warn("Couldn't init IB address resolution\n"); goto err_ibnl; } From a812f2d60a9fb7818f9c81f967180317b52545c0 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 5 Aug 2020 21:45:48 -0700 Subject: [PATCH 229/290] RDMA/bnxt_re: Do not add user qps to flushlist Driver shall add only the kernel qps to the flush list for clean up. During async error events from the HW, driver is adding qps to this list without checking if the qp is kernel qp or not. Add a check to avoid user qp addition to the flush list. Fixes: 942c9b6ca8de ("RDMA/bnxt_re: Avoid Hard lockup during error CQE processing") Fixes: c50866e2853a ("bnxt_re: fix the regression due to changes in alloc_pbl") Link: https://lore.kernel.org/r/1596689148-4023-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index dad0df8a2467..17ac8b7c5710 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -821,7 +821,8 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, struct ib_event event; unsigned int flags; - if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR && + rdma_is_kernel_res(&qp->ib_qp.res)) { flags = bnxt_re_lock_cqs(qp); bnxt_qplib_add_flush_qp(&qp->qplib_qp); bnxt_re_unlock_cqs(qp, flags); From b25e8e85e75a61af1ddc88c4798387dd3132dd43 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Tue, 11 Aug 2020 13:49:31 -0400 Subject: [PATCH 230/290] RDMA/hfi1: Correct an interlock issue for TID RDMA WRITE request The following message occurs when running an AI application with TID RDMA enabled: hfi1 0000:7f:00.0: hfi1_0: [QP74] hfi1_tid_timeout 4084 hfi1 0000:7f:00.0: hfi1_0: [QP70] hfi1_tid_timeout 4084 The issue happens when TID RDMA WRITE request is followed by an IB_WR_RDMA_WRITE_WITH_IMM request, the latter could be completed first on the responder side. As a result, no ACK packet for the latter could be sent because the TID RDMA WRITE request is still being processed on the responder side. When the TID RDMA WRITE request is eventually completed, the requester will wait for the IB_WR_RDMA_WRITE_WITH_IMM request to be acknowledged. If the next request is another TID RDMA WRITE request, no TID RDMA WRITE DATA packet could be sent because the preceding IB_WR_RDMA_WRITE_WITH_IMM request is not completed yet. Consequently the IB_WR_RDMA_WRITE_WITH_IMM will be retried but it will be ignored on the responder side because the responder thinks it has already been completed. Eventually the retry will be exhausted and the qp will be put into error state on the requester side. On the responder side, the TID resource timer will eventually expire because no TID RDMA WRITE DATA packets will be received for the second TID RDMA WRITE request. There is also risk of a write-after-write memory corruption due to the issue. Fix by adding a requester side interlock to prevent any potential data corruption and TID RDMA protocol error. Fixes: a0b34f75ec20 ("IB/hfi1: Add interlock between a TID RDMA request and other requests") Link: https://lore.kernel.org/r/20200811174931.191210.84093.stgit@awfm-01.aw.intel.com Cc: # 5.4.x+ Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/tid_rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 9af82ff933d7..73d197e21730 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -3215,6 +3215,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: switch (prev->wr.opcode) { case IB_WR_TID_RDMA_WRITE: req = wqe_to_tid_req(prev); From 6da06c6291f38be4df6df2efb76ba925096d2691 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 19 Aug 2020 17:39:44 +0800 Subject: [PATCH 231/290] Revert "RDMA/hns: Reserve one sge in order to avoid local length error" This patch caused some issues on SEND operation, and it should be reverted to make the drivers work correctly. There will be a better solution that has been tested carefully to solve the original problem. This reverts commit 711195e57d341e58133d92cf8aaab1db24e4768d. Fixes: 711195e57d34 ("RDMA/hns: Reserve one sge in order to avoid local length error") Link: https://lore.kernel.org/r/1597829984-20223-1-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 -- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 ++++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 +--- drivers/infiniband/hw/hns/hns_roce_qp.c | 5 ++--- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index da9888deff8c..6edcbdcd8f43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -65,8 +65,6 @@ #define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 #define HNS_ROCE_MIN_CQE_CNT 16 -#define HNS_ROCE_RESERVED_SGE 1 - #define HNS_ROCE_MAX_IRQ_NUM 128 #define HNS_ROCE_SGE_IN_WQE 2 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d2968594664b..4cda95ed1fbe 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -633,7 +633,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - if (unlikely(wr->num_sge >= hr_qp->rq.max_gs)) { + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n", wr->num_sge, hr_qp->rq.max_gs); ret = -EINVAL; @@ -653,7 +653,6 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, if (wr->num_sge < hr_qp->rq.max_gs) { dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg->addr = 0; - dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); } /* rq support inline data */ @@ -787,8 +786,8 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } if (wr->num_sge < srq->max_gs) { - dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); - dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg[i].len = 0; + dseg[i].lkey = cpu_to_le32(0x100); dseg[i].addr = 0; } @@ -5070,7 +5069,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) attr->srq_limit = limit_wl; attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs - HNS_ROCE_RESERVED_SGE; + attr->max_sge = srq->max_gs; out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 1fb1c583d0f8..ac29be43b6bd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -92,9 +92,7 @@ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 -#define HNS_ROCE_INVALID_LKEY 0x0 -#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 - +#define HNS_ROCE_INVALID_LKEY 0x100 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e94ca130ff5e..c063c450c715 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -386,8 +386,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return -EINVAL; } - hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) + - HNS_ROCE_RESERVED_SGE); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -402,7 +401,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = cnt; - cap->max_recv_sge = hr_qp->rq.max_gs - HNS_ROCE_RESERVED_SGE; + cap->max_recv_sge = hr_qp->rq.max_gs; return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index f40a000e94ee..b9e2dbd372b6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -297,7 +297,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, spin_lock_init(&srq->lock); srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); - srq->max_gs = init_attr->attr.max_sge + HNS_ROCE_RESERVED_SGE; + srq->max_gs = init_attr->attr.max_sge; if (udata) { ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); From bb175342aa64e6c6f1d04f5235502121d6ff0247 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 20 Aug 2020 11:33:35 +0300 Subject: [PATCH 232/290] io_uring: fix racy req->flags modification Setting and clearing REQ_F_OVERFLOW in io_uring_cancel_files() and io_cqring_overflow_flush() are racy, because they might be called asynchronously. REQ_F_OVERFLOW flag in only needed for files cancellation, so if it can be guaranteed that requests _currently_ marked inflight can't be overflown, the problem will be solved with removing the flag altogether. That's how the patch works, it removes inflight status of a request in io_cqring_fill_event() whenever it should be thrown into CQ-overflow list. That's Ok to do, because no opcode specific handling can be done after io_cqring_fill_event(), the same assumption as with "struct io_completion" patches. And it already have a good place for such cleanups, which is io_clean_op(). A nice side effect of this is removing this inflight check from the hot path. note on synchronisation: now __io_cqring_fill_event() may be taking two spinlocks simultaneously, completion_lock and inflight_lock. It's fine, because we never do that in reverse order, and CQ-overflow of inflight requests shouldn't happen often. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 61 ++++++++++++++------------------------------------- 1 file changed, 17 insertions(+), 44 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1f2f31d93686..d43a20a554e4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -540,7 +540,6 @@ enum { REQ_F_ISREG_BIT, REQ_F_COMP_LOCKED_BIT, REQ_F_NEED_CLEANUP_BIT, - REQ_F_OVERFLOW_BIT, REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, REQ_F_NO_FILE_TABLE_BIT, @@ -583,8 +582,6 @@ enum { REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT), /* needs cleanup */ REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), - /* in overflow list */ - REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT), /* already went through poll handler */ REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), /* buffer already selected */ @@ -946,7 +943,8 @@ static void io_get_req_task(struct io_kiocb *req) static inline void io_clean_op(struct io_kiocb *req) { - if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED)) + if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED | + REQ_F_INFLIGHT)) __io_clean_op(req); } @@ -1366,7 +1364,6 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb, compl.list); list_move(&req->compl.list, &list); - req->flags &= ~REQ_F_OVERFLOW; if (cqe) { WRITE_ONCE(cqe->user_data, req->user_data); WRITE_ONCE(cqe->res, req->result); @@ -1419,7 +1416,6 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; } io_clean_op(req); - req->flags |= REQ_F_OVERFLOW; req->result = res; req->compl.cflags = cflags; refcount_inc(&req->refs); @@ -1563,17 +1559,6 @@ static bool io_dismantle_req(struct io_kiocb *req) if (req->file) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); - if (req->flags & REQ_F_INFLIGHT) { - struct io_ring_ctx *ctx = req->ctx; - unsigned long flags; - - spin_lock_irqsave(&ctx->inflight_lock, flags); - list_del(&req->inflight_entry); - if (waitqueue_active(&ctx->inflight_wait)) - wake_up(&ctx->inflight_wait); - spin_unlock_irqrestore(&ctx->inflight_lock, flags); - } - return io_req_clean_work(req); } @@ -5634,6 +5619,18 @@ static void __io_clean_op(struct io_kiocb *req) } req->flags &= ~REQ_F_NEED_CLEANUP; } + + if (req->flags & REQ_F_INFLIGHT) { + struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->inflight_lock, flags); + list_del(&req->inflight_entry); + if (waitqueue_active(&ctx->inflight_wait)) + wake_up(&ctx->inflight_wait); + spin_unlock_irqrestore(&ctx->inflight_lock, flags); + req->flags &= ~REQ_F_INFLIGHT; + } } static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, @@ -8108,33 +8105,9 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, /* We need to keep going until we don't find a matching req */ if (!cancel_req) break; - - if (cancel_req->flags & REQ_F_OVERFLOW) { - spin_lock_irq(&ctx->completion_lock); - list_del(&cancel_req->compl.list); - cancel_req->flags &= ~REQ_F_OVERFLOW; - - io_cqring_mark_overflow(ctx); - WRITE_ONCE(ctx->rings->cq_overflow, - atomic_inc_return(&ctx->cached_cq_overflow)); - io_commit_cqring(ctx); - spin_unlock_irq(&ctx->completion_lock); - - /* - * Put inflight ref and overflow ref. If that's - * all we had, then we're done with this request. - */ - if (refcount_sub_and_test(2, &cancel_req->refs)) { - io_free_req(cancel_req); - finish_wait(&ctx->inflight_wait, &wait); - continue; - } - } else { - /* cancel this request, or head link requests */ - io_attempt_cancel(ctx, cancel_req); - io_put_req(cancel_req); - } - + /* cancel this request, or head link requests */ + io_attempt_cancel(ctx, cancel_req); + io_put_req(cancel_req); schedule(); finish_wait(&ctx->inflight_wait, &wait); } From f261c16861b82951bd2125706660ce4602930563 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 20 Aug 2020 11:34:10 +0300 Subject: [PATCH 233/290] io_uring: comment on kfree(iovec) checks kfree() handles NULL pointers well, but io_{read,write}() checks it because of performance reasons. Leave a comment there for those who are tempted to patch it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index d43a20a554e4..b9ca5a54dc20 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3204,6 +3204,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, kiocb_done(kiocb, ret, cs); ret = 0; out_free: + /* it's reportedly faster than delegating the null check to kfree() */ if (iovec) kfree(iovec); return ret; @@ -3300,6 +3301,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, return -EAGAIN; } out_free: + /* it's reportedly faster than delegating the null check to kfree() */ if (iovec) kfree(iovec); return ret; From 867a23eab52847d41a0a6eae41a64d76de7782a8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 20 Aug 2020 11:34:39 +0300 Subject: [PATCH 234/290] io_uring: kill extra iovec=NULL in import_iovec() If io_import_iovec() returns an error, return iovec is undefined and must not be used, so don't set it to NULL when failing. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b9ca5a54dc20..91e2cc8414f9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2826,10 +2826,8 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb *req, if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) { if (req->flags & REQ_F_BUFFER_SELECT) { buf = io_rw_buffer_select(req, &sqe_len, needs_lock); - if (IS_ERR(buf)) { - *iovec = NULL; + if (IS_ERR(buf)) return PTR_ERR(buf); - } req->rw.len = sqe_len; } From 90a9b102eddf6a3f987d15f4454e26a2532c1c98 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 20 Aug 2020 11:48:44 +0530 Subject: [PATCH 235/290] powerpc/pseries: Do not initiate shutdown when system is running on UPS As per PAPR we have to look for both EPOW sensor value and event modifier to identify the type of event and take appropriate action. In LoPAPR v1.1 section 10.2.2 includes table 136 "EPOW Action Codes": SYSTEM_SHUTDOWN 3 The system must be shut down. An EPOW-aware OS logs the EPOW error log information, then schedules the system to be shut down to begin after an OS defined delay internal (default is 10 minutes.) Then in section 10.3.2.2.8 there is table 146 "Platform Event Log Format, Version 6, EPOW Section", which includes the "EPOW Event Modifier": For EPOW sensor value = 3 0x01 = Normal system shutdown with no additional delay 0x02 = Loss of utility power, system is running on UPS/Battery 0x03 = Loss of system critical functions, system should be shutdown 0x04 = Ambient temperature too high All other values = reserved We have a user space tool (rtas_errd) on LPAR to monitor for EPOW_SHUTDOWN_ON_UPS. Once it gets an event it initiates shutdown after predefined time. It also starts monitoring for any new EPOW events. If it receives "Power restored" event before predefined time it will cancel the shutdown. Otherwise after predefined time it will shutdown the system. Commit 79872e35469b ("powerpc/pseries: All events of EPOW_SYSTEM_SHUTDOWN must initiate shutdown") changed our handling of the "on UPS/Battery" case, to immediately shutdown the system. This breaks existing setups that rely on the userspace tool to delay shutdown and let the system run on the UPS. Fixes: 79872e35469b ("powerpc/pseries: All events of EPOW_SYSTEM_SHUTDOWN must initiate shutdown") Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Vasant Hegde [mpe: Massage change log and add PAPR references] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200820061844.306460-1-hegdevasant@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/ras.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index f3736fcd98fc..13c86a292c6d 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -184,7 +184,6 @@ static void handle_system_shutdown(char event_modifier) case EPOW_SHUTDOWN_ON_UPS: pr_emerg("Loss of system power detected. System is running on" " UPS/battery. Check RTAS error log for details\n"); - orderly_poweroff(true); break; case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: From 1e891e513e16c145cc9b45b1fdb8bf4a4f2f9557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 19 Aug 2020 13:05:34 +0200 Subject: [PATCH 236/290] libbpf: Fix map index used in error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error message emitted by bpf_object__init_user_btf_maps() was using the wrong section ID. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200819110534.9058-1-toke@redhat.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5d20b2da4427..0ad0b0491e1f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2264,7 +2264,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, data = elf_getdata(scn, NULL); if (!scn || !data) { pr_warn("failed to get Elf_Data from map section %d (%s)\n", - obj->efile.maps_shndx, MAPS_ELF_SEC); + obj->efile.btf_maps_shndx, MAPS_ELF_SEC); return -EINVAL; } From ba8e42077bbe046a09bdb965dbfbf8c27594fe8f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Aug 2020 14:37:12 +0100 Subject: [PATCH 237/290] afs: Fix key ref leak in afs_put_operation() The afs_put_operation() function needs to put the reference to the key that's authenticating the operation. Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept") Reported-by: Dave Botsch Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/fs_operation.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index 24fd163c6323..97cab12b0a6c 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -235,6 +235,7 @@ int afs_put_operation(struct afs_operation *op) afs_end_cursor(&op->ac); afs_put_serverlist(op->net, op->server_list); afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op); + key_put(op->key); kfree(op); return ret; } From cc7f3f72dc2ae2b383142896d79ca1e237ad7e8b Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Aug 2020 18:12:48 +0530 Subject: [PATCH 238/290] RISC-V: Add mechanism to provide custom IPI operations We add mechanism to set custom IPI operations so that CLINT driver from drivers directory can provide custom IPI operations. Signed-off-by: Anup Patel Tested-by: Emil Renner Berhing Reviewed-by: Atish Patra Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/clint.h | 25 -------------------- arch/riscv/include/asm/smp.h | 19 +++++++++++++++ arch/riscv/kernel/clint.c | 23 ++++++++++++++++-- arch/riscv/kernel/sbi.c | 14 +++++++++++ arch/riscv/kernel/smp.c | 43 +++++++++++++++++++--------------- arch/riscv/kernel/smpboot.c | 3 +-- 6 files changed, 79 insertions(+), 48 deletions(-) diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h index a279b17a6aad..adaba98a7d6c 100644 --- a/arch/riscv/include/asm/clint.h +++ b/arch/riscv/include/asm/clint.h @@ -6,34 +6,9 @@ #include #ifdef CONFIG_RISCV_M_MODE -extern u32 __iomem *clint_ipi_base; - void clint_init_boot_cpu(void); - -static inline void clint_send_ipi_single(unsigned long hartid) -{ - writel(1, clint_ipi_base + hartid); -} - -static inline void clint_send_ipi_mask(const struct cpumask *mask) -{ - int cpu; - - for_each_cpu(cpu, mask) - clint_send_ipi_single(cpuid_to_hartid_map(cpu)); -} - -static inline void clint_clear_ipi(unsigned long hartid) -{ - writel(0, clint_ipi_base + hartid); -} #else /* CONFIG_RISCV_M_MODE */ #define clint_init_boot_cpu() do { } while (0) - -/* stubs to for code is only reachable under IS_ENABLED(CONFIG_RISCV_M_MODE): */ -void clint_send_ipi_single(unsigned long hartid); -void clint_send_ipi_mask(const struct cpumask *hartid_mask); -void clint_clear_ipi(unsigned long hartid); #endif /* CONFIG_RISCV_M_MODE */ #endif /* _ASM_RISCV_CLINT_H */ diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 6dfd2a1446d5..df1f7c4cd433 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -15,6 +15,11 @@ struct seq_file; extern unsigned long boot_cpu_hartid; +struct riscv_ipi_ops { + void (*ipi_inject)(const struct cpumask *target); + void (*ipi_clear)(void); +}; + #ifdef CONFIG_SMP /* * Mapping between linux logical cpu index and hartid. @@ -40,6 +45,12 @@ void arch_send_call_function_single_ipi(int cpu); int riscv_hartid_to_cpuid(int hartid); void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out); +/* Set custom IPI operations */ +void riscv_set_ipi_ops(struct riscv_ipi_ops *ops); + +/* Clear IPI for current CPU */ +void riscv_clear_ipi(void); + /* Secondary hart entry */ asmlinkage void smp_callin(void); @@ -81,6 +92,14 @@ static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in, cpumask_set_cpu(boot_cpu_hartid, out); } +static inline void riscv_set_ipi_ops(struct riscv_ipi_ops *ops) +{ +} + +static inline void riscv_clear_ipi(void) +{ +} + #endif /* CONFIG_SMP */ #if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP) diff --git a/arch/riscv/kernel/clint.c b/arch/riscv/kernel/clint.c index 3647980d14c3..a9845ee023e2 100644 --- a/arch/riscv/kernel/clint.c +++ b/arch/riscv/kernel/clint.c @@ -5,11 +5,11 @@ #include #include +#include #include #include #include #include -#include /* * This is the layout used by the SiFive clint, which is also shared by the qemu @@ -21,6 +21,24 @@ u32 __iomem *clint_ipi_base; +static void clint_send_ipi(const struct cpumask *target) +{ + unsigned int cpu; + + for_each_cpu(cpu, target) + writel(1, clint_ipi_base + cpuid_to_hartid_map(cpu)); +} + +static void clint_clear_ipi(void) +{ + writel(0, clint_ipi_base + cpuid_to_hartid_map(smp_processor_id())); +} + +static struct riscv_ipi_ops clint_ipi_ops = { + .ipi_inject = clint_send_ipi, + .ipi_clear = clint_clear_ipi, +}; + void clint_init_boot_cpu(void) { struct device_node *np; @@ -40,5 +58,6 @@ void clint_init_boot_cpu(void) riscv_time_cmp = base + CLINT_TIME_CMP_OFF; riscv_time_val = base + CLINT_TIME_VAL_OFF; - clint_clear_ipi(boot_cpu_hartid); + clint_clear_ipi(); + riscv_set_ipi_ops(&clint_ipi_ops); } diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f383ef5672b2..226ccce0f9e0 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -547,6 +547,18 @@ static inline long sbi_get_firmware_version(void) return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_VERSION); } +static void sbi_send_cpumask_ipi(const struct cpumask *target) +{ + struct cpumask hartid_mask; + + riscv_cpuid_to_hartid_mask(target, &hartid_mask); + + sbi_send_ipi(cpumask_bits(&hartid_mask)); +} + +static struct riscv_ipi_ops sbi_ipi_ops = { + .ipi_inject = sbi_send_cpumask_ipi +}; int __init sbi_init(void) { @@ -587,5 +599,7 @@ int __init sbi_init(void) __sbi_rfence = __sbi_rfence_v01; } + riscv_set_ipi_ops(&sbi_ipi_ops); + return 0; } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 554b0fb47060..cf9acb43604c 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -86,9 +86,25 @@ static void ipi_stop(void) wait_for_interrupt(); } +static struct riscv_ipi_ops *ipi_ops; + +void riscv_set_ipi_ops(struct riscv_ipi_ops *ops) +{ + ipi_ops = ops; +} +EXPORT_SYMBOL_GPL(riscv_set_ipi_ops); + +void riscv_clear_ipi(void) +{ + if (ipi_ops && ipi_ops->ipi_clear) + ipi_ops->ipi_clear(); + + csr_clear(CSR_IP, IE_SIE); +} +EXPORT_SYMBOL_GPL(riscv_clear_ipi); + static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op) { - struct cpumask hartid_mask; int cpu; smp_mb__before_atomic(); @@ -96,33 +112,22 @@ static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op) set_bit(op, &ipi_data[cpu].bits); smp_mb__after_atomic(); - riscv_cpuid_to_hartid_mask(mask, &hartid_mask); - if (IS_ENABLED(CONFIG_RISCV_SBI)) - sbi_send_ipi(cpumask_bits(&hartid_mask)); + if (ipi_ops && ipi_ops->ipi_inject) + ipi_ops->ipi_inject(mask); else - clint_send_ipi_mask(mask); + pr_warn("SMP: IPI inject method not available\n"); } static void send_ipi_single(int cpu, enum ipi_message_type op) { - int hartid = cpuid_to_hartid_map(cpu); - smp_mb__before_atomic(); set_bit(op, &ipi_data[cpu].bits); smp_mb__after_atomic(); - if (IS_ENABLED(CONFIG_RISCV_SBI)) - sbi_send_ipi(cpumask_bits(cpumask_of(hartid))); + if (ipi_ops && ipi_ops->ipi_inject) + ipi_ops->ipi_inject(cpumask_of(cpu)); else - clint_send_ipi_single(hartid); -} - -static inline void clear_ipi(void) -{ - if (IS_ENABLED(CONFIG_RISCV_SBI)) - csr_clear(CSR_IP, IE_SIE); - else - clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id())); + pr_warn("SMP: IPI inject method not available\n"); } #ifdef CONFIG_IRQ_WORK @@ -140,7 +145,7 @@ void handle_IPI(struct pt_regs *regs) irq_enter(); - clear_ipi(); + riscv_clear_ipi(); while (true) { unsigned long ops; diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 356825a57551..12033d71ba0c 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -147,8 +147,7 @@ asmlinkage __visible void smp_callin(void) struct mm_struct *mm = &init_mm; unsigned int curr_cpuid = smp_processor_id(); - if (!IS_ENABLED(CONFIG_RISCV_SBI)) - clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id())); + riscv_clear_ipi(); /* All kernel threads share the same mm context. */ mmgrab(mm); From 2ac6795fcc085e8d03649f1bbd0d70aaff612cad Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Aug 2020 18:12:49 +0530 Subject: [PATCH 239/290] clocksource/drivers: Add CLINT timer driver We add a separate CLINT timer driver for Linux RISC-V M-mode (i.e. RISC-V NoMMU kernel). The CLINT MMIO device provides three things: 1. 64bit free running counter register 2. 64bit per-CPU time compare registers 3. 32bit per-CPU inter-processor interrupt registers Unlike other timer devices, CLINT provides IPI registers along with timer registers. To use CLINT IPI registers, the CLINT timer driver provides IPI related callbacks to arch/riscv. Signed-off-by: Anup Patel Tested-by: Emil Renner Berhing Acked-by: Daniel Lezcano Reviewed-by: Atish Patra Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- drivers/clocksource/Kconfig | 9 ++ drivers/clocksource/Makefile | 1 + drivers/clocksource/timer-clint.c | 226 ++++++++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 4 files changed, 237 insertions(+) create mode 100644 drivers/clocksource/timer-clint.c diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 3576ad7bd380..d95cc7234a66 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -663,6 +663,15 @@ config RISCV_TIMER is accessed via both the SBI and the rdcycle instruction. This is required for all RISC-V systems. +config CLINT_TIMER + bool "CLINT Timer for the RISC-V platform" if COMPILE_TEST + depends on GENERIC_SCHED_CLOCK && RISCV + select TIMER_PROBE + select TIMER_OF + help + This option enables the CLINT timer for RISC-V systems. The CLINT + driver is usually used for NoMMU RISC-V systems. + config CSKY_MP_TIMER bool "SMP Timer for the C-SKY platform" if COMPILE_TEST depends on CSKY diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index eaedb7240ae7..1c444cc3bb44 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -89,6 +89,7 @@ obj-$(CONFIG_CLKSRC_ST_LPC) += clksrc_st_lpc.o obj-$(CONFIG_X86_NUMACHIP) += numachip.o obj-$(CONFIG_ATCPIT100_TIMER) += timer-atcpit100.o obj-$(CONFIG_RISCV_TIMER) += timer-riscv.o +obj-$(CONFIG_CLINT_TIMER) += timer-clint.o obj-$(CONFIG_CSKY_MP_TIMER) += timer-mp-csky.o obj-$(CONFIG_GX6605S_TIMER) += timer-gx6605s.o obj-$(CONFIG_HYPERV_TIMER) += hyperv_timer.o diff --git a/drivers/clocksource/timer-clint.c b/drivers/clocksource/timer-clint.c new file mode 100644 index 000000000000..8eeafa82c03d --- /dev/null +++ b/drivers/clocksource/timer-clint.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + * + * Most of the M-mode (i.e. NoMMU) RISC-V systems usually have a + * CLINT MMIO timer device. + */ + +#define pr_fmt(fmt) "clint: " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CLINT_IPI_OFF 0 +#define CLINT_TIMER_CMP_OFF 0x4000 +#define CLINT_TIMER_VAL_OFF 0xbff8 + +/* CLINT manages IPI and Timer for RISC-V M-mode */ +static u32 __iomem *clint_ipi_base; +static u64 __iomem *clint_timer_cmp; +static u64 __iomem *clint_timer_val; +static unsigned long clint_timer_freq; +static unsigned int clint_timer_irq; + +static void clint_send_ipi(const struct cpumask *target) +{ + unsigned int cpu; + + for_each_cpu(cpu, target) + writel(1, clint_ipi_base + cpuid_to_hartid_map(cpu)); +} + +static void clint_clear_ipi(void) +{ + writel(0, clint_ipi_base + cpuid_to_hartid_map(smp_processor_id())); +} + +static struct riscv_ipi_ops clint_ipi_ops = { + .ipi_inject = clint_send_ipi, + .ipi_clear = clint_clear_ipi, +}; + +#ifdef CONFIG_64BIT +#define clint_get_cycles() readq_relaxed(clint_timer_val) +#else +#define clint_get_cycles() readl_relaxed(clint_timer_val) +#define clint_get_cycles_hi() readl_relaxed(((u32 *)clint_timer_val) + 1) +#endif + +#ifdef CONFIG_64BIT +static u64 notrace clint_get_cycles64(void) +{ + return clint_get_cycles(); +} +#else /* CONFIG_64BIT */ +static u64 notrace clint_get_cycles64(void) +{ + u32 hi, lo; + + do { + hi = clint_get_cycles_hi(); + lo = clint_get_cycles(); + } while (hi != clint_get_cycles_hi()); + + return ((u64)hi << 32) | lo; +} +#endif /* CONFIG_64BIT */ + +static u64 clint_rdtime(struct clocksource *cs) +{ + return clint_get_cycles64(); +} + +static struct clocksource clint_clocksource = { + .name = "clint_clocksource", + .rating = 300, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .read = clint_rdtime, +}; + +static int clint_clock_next_event(unsigned long delta, + struct clock_event_device *ce) +{ + void __iomem *r = clint_timer_cmp + + cpuid_to_hartid_map(smp_processor_id()); + + csr_set(CSR_IE, IE_TIE); + writeq_relaxed(clint_get_cycles64() + delta, r); + return 0; +} + +static DEFINE_PER_CPU(struct clock_event_device, clint_clock_event) = { + .name = "clint_clockevent", + .features = CLOCK_EVT_FEAT_ONESHOT, + .rating = 100, + .set_next_event = clint_clock_next_event, +}; + +static int clint_timer_starting_cpu(unsigned int cpu) +{ + struct clock_event_device *ce = per_cpu_ptr(&clint_clock_event, cpu); + + ce->cpumask = cpumask_of(cpu); + clockevents_config_and_register(ce, clint_timer_freq, 100, 0x7fffffff); + + enable_percpu_irq(clint_timer_irq, + irq_get_trigger_type(clint_timer_irq)); + return 0; +} + +static int clint_timer_dying_cpu(unsigned int cpu) +{ + disable_percpu_irq(clint_timer_irq); + return 0; +} + +static irqreturn_t clint_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evdev = this_cpu_ptr(&clint_clock_event); + + csr_clear(CSR_IE, IE_TIE); + evdev->event_handler(evdev); + + return IRQ_HANDLED; +} + +static int __init clint_timer_init_dt(struct device_node *np) +{ + int rc; + u32 i, nr_irqs; + void __iomem *base; + struct of_phandle_args oirq; + + /* + * Ensure that CLINT device interrupts are either RV_IRQ_TIMER or + * RV_IRQ_SOFT. If it's anything else then we ignore the device. + */ + nr_irqs = of_irq_count(np); + for (i = 0; i < nr_irqs; i++) { + if (of_irq_parse_one(np, i, &oirq)) { + pr_err("%pOFP: failed to parse irq %d.\n", np, i); + continue; + } + + if ((oirq.args_count != 1) || + (oirq.args[0] != RV_IRQ_TIMER && + oirq.args[0] != RV_IRQ_SOFT)) { + pr_err("%pOFP: invalid irq %d (hwirq %d)\n", + np, i, oirq.args[0]); + return -ENODEV; + } + + /* Find parent irq domain and map timer irq */ + if (!clint_timer_irq && + oirq.args[0] == RV_IRQ_TIMER && + irq_find_host(oirq.np)) + clint_timer_irq = irq_of_parse_and_map(np, i); + } + + /* If CLINT timer irq not found then fail */ + if (!clint_timer_irq) { + pr_err("%pOFP: timer irq not found\n", np); + return -ENODEV; + } + + base = of_iomap(np, 0); + if (!base) { + pr_err("%pOFP: could not map registers\n", np); + return -ENODEV; + } + + clint_ipi_base = base + CLINT_IPI_OFF; + clint_timer_cmp = base + CLINT_TIMER_CMP_OFF; + clint_timer_val = base + CLINT_TIMER_VAL_OFF; + clint_timer_freq = riscv_timebase; + + pr_info("%pOFP: timer running at %ld Hz\n", np, clint_timer_freq); + + rc = clocksource_register_hz(&clint_clocksource, clint_timer_freq); + if (rc) { + pr_err("%pOFP: clocksource register failed [%d]\n", np, rc); + goto fail_iounmap; + } + + sched_clock_register(clint_get_cycles64, 64, clint_timer_freq); + + rc = request_percpu_irq(clint_timer_irq, clint_timer_interrupt, + "clint-timer", &clint_clock_event); + if (rc) { + pr_err("registering percpu irq failed [%d]\n", rc); + goto fail_iounmap; + } + + rc = cpuhp_setup_state(CPUHP_AP_CLINT_TIMER_STARTING, + "clockevents/clint/timer:starting", + clint_timer_starting_cpu, + clint_timer_dying_cpu); + if (rc) { + pr_err("%pOFP: cpuhp setup state failed [%d]\n", np, rc); + goto fail_free_irq; + } + + riscv_set_ipi_ops(&clint_ipi_ops); + clint_clear_ipi(); + + return 0; + +fail_free_irq: + free_irq(clint_timer_irq, &clint_clock_event); +fail_iounmap: + iounmap(base); + return rc; +} + +TIMER_OF_DECLARE(clint_timer, "riscv,clint0", clint_timer_init_dt); +TIMER_OF_DECLARE(clint_timer1, "sifive,clint0", clint_timer_init_dt); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index a2710e654b64..3215023d4852 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -132,6 +132,7 @@ enum cpuhp_state { CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_RISCV_TIMER_STARTING, + CPUHP_AP_CLINT_TIMER_STARTING, CPUHP_AP_CSKY_TIMER_STARTING, CPUHP_AP_HYPERV_TIMER_STARTING, CPUHP_AP_KVM_STARTING, From 2bc3fc877aa9c4c8b80cc49f66dfcb7e4857a128 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Aug 2020 18:12:50 +0530 Subject: [PATCH 240/290] RISC-V: Remove CLINT related code from timer and arch Right now the RISC-V timer driver is convoluted to support: 1. Linux RISC-V S-mode (with MMU) where it will use TIME CSR for clocksource and SBI timer calls for clockevent device. 2. Linux RISC-V M-mode (without MMU) where it will use CLINT MMIO counter register for clocksource and CLINT MMIO compare register for clockevent device. We now have a separate CLINT timer driver which also provide CLINT based IPI operations so let's remove CLINT MMIO related code from arch/riscv directory and RISC-V timer driver. Signed-off-by: Anup Patel Tested-by: Emil Renner Berhing Acked-by: Daniel Lezcano Reviewed-by: Atish Patra Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- arch/riscv/Kconfig.socs | 2 + arch/riscv/configs/nommu_virt_defconfig | 7 +-- arch/riscv/include/asm/clint.h | 14 ------ arch/riscv/include/asm/timex.h | 28 +++-------- arch/riscv/kernel/Makefile | 2 +- arch/riscv/kernel/clint.c | 63 ------------------------- arch/riscv/kernel/setup.c | 2 - arch/riscv/kernel/smp.c | 1 - arch/riscv/kernel/smpboot.c | 1 - drivers/clocksource/Kconfig | 3 +- drivers/clocksource/timer-riscv.c | 17 +------ 12 files changed, 16 insertions(+), 126 deletions(-) delete mode 100644 arch/riscv/include/asm/clint.h delete mode 100644 arch/riscv/kernel/clint.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7b5905529146..df18372861d8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -81,7 +81,7 @@ config RISCV select PCI_DOMAINS_GENERIC if PCI select PCI_MSI if PCI select RISCV_INTC - select RISCV_TIMER + select RISCV_TIMER if RISCV_SBI select SPARSEMEM_STATIC if 32BIT select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 6c88148f1b9b..8a55f6156661 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -12,6 +12,7 @@ config SOC_SIFIVE config SOC_VIRT bool "QEMU Virt Machine" + select CLINT_TIMER if RISCV_M_MODE select POWER_RESET select POWER_RESET_SYSCON select POWER_RESET_SYSCON_POWEROFF @@ -24,6 +25,7 @@ config SOC_VIRT config SOC_KENDRYTE bool "Kendryte K210 SoC" depends on !MMU + select CLINT_TIMER if RISCV_M_MODE select SERIAL_SIFIVE if TTY select SERIAL_SIFIVE_CONSOLE if TTY select SIFIVE_PLIC diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index f27596e9663e..e046a0babde4 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -26,6 +26,7 @@ CONFIG_EXPERT=y CONFIG_SLOB=y # CONFIG_SLAB_MERGE_DEFAULT is not set # CONFIG_MMU is not set +CONFIG_SOC_VIRT=y CONFIG_MAXPHYSMEM_2GB=y CONFIG_SMP=y CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0" @@ -49,7 +50,6 @@ CONFIG_VIRTIO_BLK=y # CONFIG_SERIO is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_LDISC_AUTOLOAD is not set -# CONFIG_DEVMEM is not set CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y @@ -57,16 +57,13 @@ CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set +# CONFIG_DEVMEM is not set # CONFIG_HWMON is not set -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set # CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_SIFIVE_PLIC=y -# CONFIG_VALIDATE_FS_PARSER is not set CONFIG_EXT2_FS=y # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h deleted file mode 100644 index adaba98a7d6c..000000000000 --- a/arch/riscv/include/asm/clint.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_RISCV_CLINT_H -#define _ASM_RISCV_CLINT_H 1 - -#include -#include - -#ifdef CONFIG_RISCV_M_MODE -void clint_init_boot_cpu(void); -#else /* CONFIG_RISCV_M_MODE */ -#define clint_init_boot_cpu() do { } while (0) -#endif /* CONFIG_RISCV_M_MODE */ - -#endif /* _ASM_RISCV_CLINT_H */ diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index bad2a7c2cda5..a3fb85d505d4 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -7,41 +7,27 @@ #define _ASM_RISCV_TIMEX_H #include -#include typedef unsigned long cycles_t; -extern u64 __iomem *riscv_time_val; -extern u64 __iomem *riscv_time_cmp; - -#ifdef CONFIG_64BIT -#define mmio_get_cycles() readq_relaxed(riscv_time_val) -#else -#define mmio_get_cycles() readl_relaxed(riscv_time_val) -#define mmio_get_cycles_hi() readl_relaxed(((u32 *)riscv_time_val) + 1) -#endif - static inline cycles_t get_cycles(void) { - if (IS_ENABLED(CONFIG_RISCV_SBI)) - return csr_read(CSR_TIME); - return mmio_get_cycles(); + return csr_read(CSR_TIME); } #define get_cycles get_cycles +static inline u32 get_cycles_hi(void) +{ + return csr_read(CSR_TIMEH); +} +#define get_cycles_hi get_cycles_hi + #ifdef CONFIG_64BIT static inline u64 get_cycles64(void) { return get_cycles(); } #else /* CONFIG_64BIT */ -static inline u32 get_cycles_hi(void) -{ - if (IS_ENABLED(CONFIG_RISCV_SBI)) - return csr_read(CSR_TIMEH); - return mmio_get_cycles_hi(); -} - static inline u64 get_cycles64(void) { u32 hi, lo; diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index a5287ab9f7f2..dc93710f0b2f 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -31,7 +31,7 @@ obj-y += cacheinfo.o obj-y += patch.o obj-$(CONFIG_MMU) += vdso.o vdso/ -obj-$(CONFIG_RISCV_M_MODE) += clint.o traps_misaligned.o +obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/riscv/kernel/clint.c b/arch/riscv/kernel/clint.c deleted file mode 100644 index a9845ee023e2..000000000000 --- a/arch/riscv/kernel/clint.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2019 Christoph Hellwig. - */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * This is the layout used by the SiFive clint, which is also shared by the qemu - * virt platform, and the Kendryte KD210 at least. - */ -#define CLINT_IPI_OFF 0 -#define CLINT_TIME_CMP_OFF 0x4000 -#define CLINT_TIME_VAL_OFF 0xbff8 - -u32 __iomem *clint_ipi_base; - -static void clint_send_ipi(const struct cpumask *target) -{ - unsigned int cpu; - - for_each_cpu(cpu, target) - writel(1, clint_ipi_base + cpuid_to_hartid_map(cpu)); -} - -static void clint_clear_ipi(void) -{ - writel(0, clint_ipi_base + cpuid_to_hartid_map(smp_processor_id())); -} - -static struct riscv_ipi_ops clint_ipi_ops = { - .ipi_inject = clint_send_ipi, - .ipi_clear = clint_clear_ipi, -}; - -void clint_init_boot_cpu(void) -{ - struct device_node *np; - void __iomem *base; - - np = of_find_compatible_node(NULL, NULL, "riscv,clint0"); - if (!np) { - panic("clint not found"); - return; - } - - base = of_iomap(np, 0); - if (!base) - panic("could not map CLINT"); - - clint_ipi_base = base + CLINT_IPI_OFF; - riscv_time_cmp = base + CLINT_TIME_CMP_OFF; - riscv_time_val = base + CLINT_TIME_VAL_OFF; - - clint_clear_ipi(); - riscv_set_ipi_ops(&clint_ipi_ops); -} diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index f04373be54a6..2c6dd329312b 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -79,7 +78,6 @@ void __init setup_arch(char **cmdline_p) #else unflatten_device_tree(); #endif - clint_init_boot_cpu(); #ifdef CONFIG_SWIOTLB swiotlb_init(1); diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index cf9acb43604c..ea028d9e0d24 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 12033d71ba0c..96167d55ed98 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index d95cc7234a66..68b087bff59c 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -653,9 +653,8 @@ config ATCPIT100_TIMER This option enables support for the Andestech ATCPIT100 timers. config RISCV_TIMER - bool "Timer for the RISC-V platform" + bool "Timer for the RISC-V platform" if COMPILE_TEST depends on GENERIC_SCHED_CLOCK && RISCV - default y select TIMER_PROBE select TIMER_OF help diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 9de1dabfb126..c51c5ed15aa7 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -19,26 +19,13 @@ #include #include #include - -u64 __iomem *riscv_time_cmp; -u64 __iomem *riscv_time_val; - -static inline void mmio_set_timer(u64 val) -{ - void __iomem *r; - - r = riscv_time_cmp + cpuid_to_hartid_map(smp_processor_id()); - writeq_relaxed(val, r); -} +#include static int riscv_clock_next_event(unsigned long delta, struct clock_event_device *ce) { csr_set(CSR_IE, IE_TIE); - if (IS_ENABLED(CONFIG_RISCV_SBI)) - sbi_set_timer(get_cycles64() + delta); - else - mmio_set_timer(get_cycles64() + delta); + sbi_set_timer(get_cycles64() + delta); return 0; } From a2770b57d0838148b6d0c8167214dce15c3645a3 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Aug 2020 18:12:51 +0530 Subject: [PATCH 241/290] dt-bindings: timer: Add CLINT bindings We add DT bindings documentation for CLINT device. Signed-off-by: Anup Patel Reviewed-by: Palmer Dabbelt Tested-by: Emil Renner Berhing Reviewed-by: Atish Patra Reviewed-by: Rob Herring Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- .../bindings/timer/sifive,clint.yaml | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 Documentation/devicetree/bindings/timer/sifive,clint.yaml diff --git a/Documentation/devicetree/bindings/timer/sifive,clint.yaml b/Documentation/devicetree/bindings/timer/sifive,clint.yaml new file mode 100644 index 000000000000..2a0e9cd9fbcf --- /dev/null +++ b/Documentation/devicetree/bindings/timer/sifive,clint.yaml @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/timer/sifive,clint.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: SiFive Core Local Interruptor + +maintainers: + - Palmer Dabbelt + - Anup Patel + +description: + SiFive (and other RISC-V) SOCs include an implementation of the SiFive + Core Local Interruptor (CLINT) for M-mode timer and M-mode inter-processor + interrupts. It directly connects to the timer and inter-processor interrupt + lines of various HARTs (or CPUs) so RISC-V per-HART (or per-CPU) local + interrupt controller is the parent interrupt controller for CLINT device. + The clock frequency of CLINT is specified via "timebase-frequency" DT + property of "/cpus" DT node. The "timebase-frequency" DT property is + described in Documentation/devicetree/bindings/riscv/cpus.yaml + +properties: + compatible: + items: + - const: sifive,fu540-c000-clint + - const: sifive,clint0 + + description: + Should be "sifive,-clint" and "sifive,clint". + Supported compatible strings are - + "sifive,fu540-c000-clint" for the SiFive CLINT v0 as integrated + onto the SiFive FU540 chip, and "sifive,clint0" for the SiFive + CLINT v0 IP block with no chip integration tweaks. + Please refer to sifive-blocks-ip-versioning.txt for details + + reg: + maxItems: 1 + + interrupts-extended: + minItems: 1 + +additionalProperties: false + +required: + - compatible + - reg + - interrupts-extended + +examples: + - | + timer@2000000 { + compatible = "sifive,fu540-c000-clint", "sifive,clint0"; + interrupts-extended = <&cpu1intc 3 &cpu1intc 7 + &cpu2intc 3 &cpu2intc 7 + &cpu3intc 3 &cpu3intc 7 + &cpu4intc 3 &cpu4intc 7>; + reg = <0x2000000 0x10000>; + }; +... From fc26f5bbf19459930b7290c87b65a9ae6a274650 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 15 Jul 2020 21:39:53 -0700 Subject: [PATCH 242/290] riscv: Add SiFive drivers to rv32_defconfig This adds SiFive drivers to rv32_defconfig, to keep in sync with the 64-bit config. This is useful when testing 32-bit kernel with QEMU 'sifive_u' 32-bit machine. Signed-off-by: Bin Meng Reviewed-by: Anup Patel Reviewed-by: Alistair Francis Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/rv32_defconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 3a55f0e00d6c..2c2cda6cc1c5 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -14,6 +14,7 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_SOC_SIFIVE=y CONFIG_SOC_VIRT=y CONFIG_ARCH_RV32I=y CONFIG_SMP=y @@ -62,6 +63,8 @@ CONFIG_HVC_RISCV_SBI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_SPI=y +CONFIG_SPI_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_POWER_RESET=y CONFIG_DRM=y @@ -77,6 +80,8 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y +CONFIG_MMC=y +CONFIG_MMC_SPI=y CONFIG_RTC_CLASS=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_BALLOON=y From fb73ed5ef7e1f3b561dae517b9efdf58c153636b Mon Sep 17 00:00:00 2001 From: Kaige Li Date: Thu, 20 Aug 2020 14:47:55 +0800 Subject: [PATCH 243/290] net: phy: mscc: Fix a couple of spelling mistakes "spcified" -> "specified" There are a couple of spelling mistakes in comment text. Fix these. Signed-off-by: Kaige Li Signed-off-by: David S. Miller --- drivers/net/phy/mscc/mscc_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index a4fbf3a4fa97..6bc7406a1ce7 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -1738,13 +1738,13 @@ static int __phy_write_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb, return 0; } -/* Trigger a read to the spcified MCB */ +/* Trigger a read to the specified MCB */ static int phy_update_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb) { return __phy_write_mcb_s6g(phydev, reg, mcb, PHY_MCB_S6G_READ); } -/* Trigger a write to the spcified MCB */ +/* Trigger a write to the specified MCB */ static int phy_commit_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb) { return __phy_write_mcb_s6g(phydev, reg, mcb, PHY_MCB_S6G_WRITE); From 3e659a82c45076e354d20db4b0776e716c6e7fe3 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 20 Aug 2020 11:47:19 +0100 Subject: [PATCH 244/290] sfc: fix build warnings on 32-bit Truncation of DMA_BIT_MASK to 32-bit dma_addr_t is semantically safe, but the compiler was warning because it was happening implicitly. Insert explicit casts to suppress the warnings. Reported-by: Randy Dunlap Signed-off-by: Edward Cree Acked-by: Randy Dunlap # build-tested Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef100.c b/drivers/net/ethernet/sfc/ef100.c index 9729983f4840..c54b7f8243f3 100644 --- a/drivers/net/ethernet/sfc/ef100.c +++ b/drivers/net/ethernet/sfc/ef100.c @@ -142,7 +142,7 @@ static int ef100_pci_parse_continue_entry(struct efx_nic *efx, int entry_locatio /* Temporarily map new BAR. */ rc = efx_init_io(efx, bar, - DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), + (dma_addr_t)DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), pci_resource_len(efx->pci_dev, bar)); if (rc) { netif_err(efx, probe, efx->net_dev, @@ -160,7 +160,7 @@ static int ef100_pci_parse_continue_entry(struct efx_nic *efx, int entry_locatio /* Put old BAR back. */ rc = efx_init_io(efx, previous_bar, - DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), + (dma_addr_t)DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), pci_resource_len(efx->pci_dev, previous_bar)); if (rc) { netif_err(efx, probe, efx->net_dev, @@ -334,7 +334,7 @@ static int ef100_pci_parse_xilinx_cap(struct efx_nic *efx, int vndr_cap, /* Temporarily map BAR. */ rc = efx_init_io(efx, bar, - DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), + (dma_addr_t)DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), pci_resource_len(efx->pci_dev, bar)); if (rc) { netif_err(efx, probe, efx->net_dev, @@ -495,7 +495,7 @@ static int ef100_pci_probe(struct pci_dev *pci_dev, /* Set up basic I/O (BAR mappings etc) */ rc = efx_init_io(efx, fcw.bar, - DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), + (dma_addr_t)DMA_BIT_MASK(ESF_GZ_TX_SEND_ADDR_WIDTH), pci_resource_len(efx->pci_dev, fcw.bar)); if (rc) goto fail; From ce51f63e63c52a4e1eee4dd040fb0ba0af3b43ab Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 20 Aug 2020 16:30:52 +0200 Subject: [PATCH 245/290] net/smc: Prevent kernel-infoleak in __smc_diag_dump() __smc_diag_dump() is potentially copying uninitialized kernel stack memory into socket buffers, since the compiler may leave a 4-byte hole near the beginning of `struct smcd_diag_dmbinfo`. Fix it by initializing `dinfo` with memset(). Fixes: 4b1b7d3b30a6 ("net/smc: add SMC-D diag support") Suggested-by: Dan Carpenter Signed-off-by: Peilin Ye Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_diag.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index e1f64f4ba236..da9ba6d1679b 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -170,13 +170,15 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_connection *conn = &smc->conn; - struct smcd_diag_dmbinfo dinfo = { - .linkid = *((u32 *)conn->lgr->id), - .peer_gid = conn->lgr->peer_gid, - .my_gid = conn->lgr->smcd->local_gid, - .token = conn->rmb_desc->token, - .peer_token = conn->peer_token - }; + struct smcd_diag_dmbinfo dinfo; + + memset(&dinfo, 0, sizeof(dinfo)); + + dinfo.linkid = *((u32 *)conn->lgr->id); + dinfo.peer_gid = conn->lgr->peer_gid; + dinfo.my_gid = conn->lgr->smcd->local_gid; + dinfo.token = conn->rmb_desc->token; + dinfo.peer_token = conn->peer_token; if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0) goto errout; From 51f6463aacfbfd322bcaadc606da56acef644b05 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Aug 2020 11:23:42 +0200 Subject: [PATCH 246/290] tools/resolve_btfids: Fix sections with wrong alignment The data of compressed section should be aligned to 4 (for 32bit) or 8 (for 64 bit) bytes. The binutils ld sets sh_addralign to 1, which makes libelf fail with misaligned section error during the update as reported by Jesper: FAILED elf_update(WRITE): invalid section alignment While waiting for ld fix, we can fix compressed sections sh_addralign value manually. Adding warning in -vv mode when the fix is triggered: $ ./tools/bpf/resolve_btfids/resolve_btfids -vv vmlinux ... section(36) .comment, size 44, link 0, flags 30, type=1 section(37) .debug_aranges, size 45684, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 16, expected 8 section(38) .debug_info, size 129104957, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 1, expected 8 section(39) .debug_abbrev, size 1152583, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 1, expected 8 section(40) .debug_line, size 7374522, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 1, expected 8 section(41) .debug_frame, size 702463, link 0, flags 800, type=1 section(42) .debug_str, size 1017571, link 0, flags 830, type=1 - fixing wrong alignment sh_addralign 1, expected 8 section(43) .debug_loc, size 3019453, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 1, expected 8 section(44) .debug_ranges, size 1744583, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 16, expected 8 section(45) .symtab, size 2955888, link 46, flags 0, type=2 section(46) .strtab, size 2613072, link 0, flags 0, type=3 ... update ok for vmlinux Another workaround is to disable compressed debug info data CONFIG_DEBUG_INFO_COMPRESSED kernel option. Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") Reported-by: Jesper Dangaard Brouer Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Acked-by: Yonghong Song Cc: Mark Wielaard Cc: Nick Clifton Link: https://lore.kernel.org/bpf/20200819092342.259004-1-jolsa@kernel.org --- tools/bpf/resolve_btfids/main.c | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 4d9ecb975862..0def0bb1f783 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -233,6 +233,39 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) return btf_id__add(root, id, false); } +/* + * The data of compressed section should be aligned to 4 + * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld + * sets sh_addralign to 1, which makes libelf fail with + * misaligned section error during the update: + * FAILED elf_update(WRITE): invalid section alignment + * + * While waiting for ld fix, we fix the compressed sections + * sh_addralign value manualy. + */ +static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh) +{ + int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8; + + if (!(sh->sh_flags & SHF_COMPRESSED)) + return 0; + + if (sh->sh_addralign == expected) + return 0; + + pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n", + sh->sh_addralign, expected); + + sh->sh_addralign = expected; + + if (gelf_update_shdr(scn, sh) == 0) { + printf("FAILED cannot update section header: %s\n", + elf_errmsg(-1)); + return -1; + } + return 0; +} + static int elf_collect(struct object *obj) { Elf_Scn *scn = NULL; @@ -309,6 +342,9 @@ static int elf_collect(struct object *obj) obj->efile.idlist_shndx = idx; obj->efile.idlist_addr = sh.sh_addr; } + + if (compressed_section_fix(elf, scn, &sh)) + return -1; } return 0; From 5597432dde62befd3ab92e6ef9e073564e277ea8 Mon Sep 17 00:00:00 2001 From: Veronika Kabatova Date: Wed, 19 Aug 2020 18:07:10 +0200 Subject: [PATCH 247/290] selftests/bpf: Remove test_align leftovers Calling generic selftests "make install" fails as rsync expects all files from TEST_GEN_PROGS to be present. The binary is not generated anymore (commit 3b09d27cc93d) so we can safely remove it from there and also from gitignore. Fixes: 3b09d27cc93d ("selftests/bpf: Move test_align under test_progs") Signed-off-by: Veronika Kabatova Signed-off-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20200819160710.1345956-1-vkabatov@redhat.com --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 1bb204cee853..9a0946ddb705 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -6,7 +6,6 @@ test_lpm_map test_tag FEATURE-DUMP.libbpf fixdep -test_align test_dev_cgroup /test_progs* test_tcpbpf_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index a83b5827532f..fc946b7ac288 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -32,7 +32,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ + test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \ From c8a36f1945b2b1b3f9823b66fc2181dc069cf803 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 22:28:41 -0700 Subject: [PATCH 248/290] bpf: xdp: Fix XDP mode when no mode flags specified 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") inadvertently changed which XDP mode is assumed when no mode flags are specified explicitly. Previously, driver mode was preferred, if driver supported it. If not, generic SKB mode was chosen. That commit changed default to SKB mode always. This patch fixes the issue and restores the original logic. Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") Reported-by: Lorenzo Bianconi Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Tested-by: Lorenzo Bianconi Link: https://lore.kernel.org/bpf/20200820052841.1559757-1-andriin@fb.com --- net/core/dev.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index b5d1129d8310..d42c9ea0c3c0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8742,13 +8742,15 @@ struct bpf_xdp_link { int flags; }; -static enum bpf_xdp_mode dev_xdp_mode(u32 flags) +static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags) { if (flags & XDP_FLAGS_HW_MODE) return XDP_MODE_HW; if (flags & XDP_FLAGS_DRV_MODE) return XDP_MODE_DRV; - return XDP_MODE_SKB; + if (flags & XDP_FLAGS_SKB_MODE) + return XDP_MODE_SKB; + return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB; } static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) @@ -8896,7 +8898,7 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack return -EINVAL; } - mode = dev_xdp_mode(flags); + mode = dev_xdp_mode(dev, flags); /* can't replace attached link */ if (dev_xdp_link(dev, mode)) { NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); @@ -8984,7 +8986,7 @@ static int dev_xdp_detach_link(struct net_device *dev, ASSERT_RTNL(); - mode = dev_xdp_mode(link->flags); + mode = dev_xdp_mode(dev, link->flags); if (dev_xdp_link(dev, mode) != link) return -EINVAL; @@ -9080,7 +9082,7 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, goto out_unlock; } - mode = dev_xdp_mode(xdp_link->flags); + mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags); bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, xdp_link->flags, new_prog); @@ -9164,7 +9166,7 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags) { - enum bpf_xdp_mode mode = dev_xdp_mode(flags); + enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags); struct bpf_prog *new_prog = NULL, *old_prog = NULL; int err; From c210773d6c6f595f5922d56b7391fe343bc7310e Mon Sep 17 00:00:00 2001 From: Yauheni Kaliuta Date: Thu, 20 Aug 2020 14:58:43 +0300 Subject: [PATCH 249/290] bpf: selftests: global_funcs: Check err_str before strstr The error path in libbpf.c:load_program() has calls to pr_warn() which ends up for global_funcs tests to test_global_funcs.c:libbpf_debug_print(). For the tests with no struct test_def::err_str initialized with a string, it causes call of strstr() with NULL as the second argument and it segfaults. Fix it by calling strstr() only for non-NULL err_str. Signed-off-by: Yauheni Kaliuta Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200820115843.39454-1-yauheni.kaliuta@redhat.com --- tools/testing/selftests/bpf/prog_tests/test_global_funcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 25b068591e9a..193002b14d7f 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -19,7 +19,7 @@ static int libbpf_debug_print(enum libbpf_print_level level, log_buf = va_arg(args, char *); if (!log_buf) goto out; - if (strstr(log_buf, err_str) == 0) + if (err_str && strstr(log_buf, err_str) == 0) found = true; out: printf(format, log_buf); From 4d820543c54c47a2bd3c95ddbf52f83c89a219a0 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Thu, 20 Aug 2020 14:53:14 -0700 Subject: [PATCH 250/290] hv_netvsc: Remove "unlikely" from netvsc_select_queue When using vf_ops->ndo_select_queue, the number of queues of VF is usually bigger than the synthetic NIC. This condition may happen often. Remove "unlikely" from the comparison of ndev->real_num_tx_queues. Fixes: b3bf5666a510 ("hv_netvsc: defer queue selection to VF") Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 787f17e2a971..0029292cdb9f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -367,7 +367,7 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, } rcu_read_unlock(); - while (unlikely(txq >= ndev->real_num_tx_queues)) + while (txq >= ndev->real_num_tx_queues) txq -= ndev->real_num_tx_queues; return txq; From c3d897e01aef8ddc43149e4d661b86f823e3aae7 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Thu, 20 Aug 2020 14:53:15 -0700 Subject: [PATCH 251/290] hv_netvsc: Fix the queue_mapping in netvsc_vf_xmit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit netvsc_vf_xmit() / dev_queue_xmit() will call VF NIC’s ndo_select_queue or netdev_pick_tx() again. They will use skb_get_rx_queue() to get the queue number, so the “skb->queue_mapping - 1” will be used. This may cause the last queue of VF not been used. Use skb_record_rx_queue() here, so that the skb_get_rx_queue() called later will get the correct queue number, and VF will be able to use all queues. Fixes: b3bf5666a510 ("hv_netvsc: defer queue selection to VF") Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 0029292cdb9f..64b0a74c1523 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -502,7 +502,7 @@ static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev, int rc; skb->dev = vf_netdev; - skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; + skb_record_rx_queue(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping); rc = dev_queue_xmit(skb); if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) { From 272502fcb7cda01ab07fc2fcff82d1d2f73d43cc Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Wed, 19 Aug 2020 13:53:58 +1200 Subject: [PATCH 252/290] gre6: Fix reception with IP6_TNL_F_RCV_DSCP_COPY When receiving an IPv4 packet inside an IPv6 GRE packet, and the IP6_TNL_F_RCV_DSCP_COPY flag is set on the tunnel, the IPv4 header would get corrupted. This is due to the common ip6_tnl_rcv() function assuming that the inner header is always IPv6. This patch checks the tunnel protocol for IPv4 inner packets, but still defaults to IPv6. Fixes: 308edfdf1563 ("gre6: Cleanup GREv6 receive path, call common GRE functions") Signed-off-by: Mark Tomlinson Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f635914f42ec..a0217e5bf3bc 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -915,7 +915,15 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, struct metadata_dst *tun_dst, bool log_ecn_err) { - return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate, + int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb); + + dscp_ecn_decapsulate = ip6ip6_dscp_ecn_decapsulate; + if (tpi->proto == htons(ETH_P_IP)) + dscp_ecn_decapsulate = ip4ip6_dscp_ecn_decapsulate; + + return __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_err); } EXPORT_SYMBOL(ip6_tnl_rcv); From 41506bff84f1563e20e505ca9a0366a30ae2a879 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Aug 2020 14:45:39 +0200 Subject: [PATCH 253/290] dt-bindings: net: renesas, ether: Improve schema validation - Remove pinctrl consumer properties, as they are handled by core dt-schema, - Document missing properties, - Document missing PHY child node, - Add "additionalProperties: false". Signed-off-by: Geert Uytterhoeven Reviewed-by: Rob Herring Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- .../bindings/net/renesas,ether.yaml | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/net/renesas,ether.yaml b/Documentation/devicetree/bindings/net/renesas,ether.yaml index 08678af5ed93..8ce5ed8a58dd 100644 --- a/Documentation/devicetree/bindings/net/renesas,ether.yaml +++ b/Documentation/devicetree/bindings/net/renesas,ether.yaml @@ -59,9 +59,15 @@ properties: clocks: maxItems: 1 - pinctrl-0: true + power-domains: + maxItems: 1 - pinctrl-names: true + resets: + maxItems: 1 + + phy-mode: true + + phy-handle: true renesas,no-ether-link: type: boolean @@ -74,6 +80,11 @@ properties: specify when the Ether LINK signal is active-low instead of normal active-high +patternProperties: + "^ethernet-phy@[0-9a-f]$": + type: object + $ref: ethernet-phy.yaml# + required: - compatible - reg @@ -83,7 +94,8 @@ required: - '#address-cells' - '#size-cells' - clocks - - pinctrl-0 + +additionalProperties: false examples: # Lager board @@ -99,8 +111,6 @@ examples: clocks = <&mstp8_clks R8A7790_CLK_ETHER>; phy-mode = "rmii"; phy-handle = <&phy1>; - pinctrl-0 = <ðer_pins>; - pinctrl-names = "default"; renesas,ether-link-active-low; #address-cells = <1>; #size-cells = <0>; @@ -109,7 +119,5 @@ examples: reg = <1>; interrupt-parent = <&irqc0>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; - pinctrl-0 = <&phy1_pins>; - pinctrl-names = "default"; }; }; From ab921f3cdbec01c68705a7ade8bec628d541fc2b Mon Sep 17 00:00:00 2001 From: David Laight Date: Wed, 19 Aug 2020 14:40:52 +0000 Subject: [PATCH 254/290] net: sctp: Fix negotiation of the number of data streams. The number of output and input streams was never being reduced, eg when processing received INIT or INIT_ACK chunks. The effect is that DATA chunks can be sent with invalid stream ids and then discarded by the remote system. Fixes: 2075e50caf5ea ("sctp: convert to genradix") Signed-off-by: David Laight Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index bda2536dd740..6dc95dcc0ff4 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -88,12 +88,13 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, int ret; if (outcnt <= stream->outcnt) - return 0; + goto out; ret = genradix_prealloc(&stream->out, outcnt, gfp); if (ret) return ret; +out: stream->outcnt = outcnt; return 0; } @@ -104,12 +105,13 @@ static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, int ret; if (incnt <= stream->incnt) - return 0; + goto out; ret = genradix_prealloc(&stream->in, incnt, gfp); if (ret) return ret; +out: stream->incnt = incnt; return 0; } From eda814b97dfb8d9f4808eb2f65af9bd3705c4cae Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Wed, 19 Aug 2020 18:24:10 +0300 Subject: [PATCH 255/290] net/sched: act_ct: Fix skb double-free in tcf_ct_handle_fragments() error flow tcf_ct_handle_fragments() shouldn't free the skb when ip_defrag() call fails. Otherwise, we will cause a double-free bug. In such cases, just return the error to the caller. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: Alaa Hleihel Reviewed-by: Roi Dayan Signed-off-by: David S. Miller --- net/sched/act_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index e6ad42b11835..2c3619165680 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -704,7 +704,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, err = ip_defrag(net, skb, user); local_bh_enable(); if (err && err != -EINPROGRESS) - goto out_free; + return err; if (!err) { *defrag = true; From f6db9096416209474090d64d8284e7c16c3d8873 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 20 Aug 2020 15:34:47 +0800 Subject: [PATCH 256/290] tipc: call rcu_read_lock() in tipc_aead_encrypt_done() b->media->send_msg() requires rcu_read_lock(), as we can see elsewhere in tipc, tipc_bearer_xmit, tipc_bearer_xmit_skb and tipc_bearer_bc_xmit(). Syzbot has reported this issue as: net/tipc/bearer.c:466 suspicious rcu_dereference_check() usage! Workqueue: cryptd cryptd_queue_worker Call Trace: tipc_l2_send_msg+0x354/0x420 net/tipc/bearer.c:466 tipc_aead_encrypt_done+0x204/0x3a0 net/tipc/crypto.c:761 cryptd_aead_crypt+0xe8/0x1d0 crypto/cryptd.c:739 cryptd_queue_worker+0x118/0x1b0 crypto/cryptd.c:181 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:291 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 So fix it by calling rcu_read_lock() in tipc_aead_encrypt_done() for b->media->send_msg(). Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Reported-by: syzbot+47bbc6b678d317cccbe0@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/crypto.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 001bcb0f2480..c38babaa4e57 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -757,10 +757,12 @@ static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err) switch (err) { case 0: this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]); + rcu_read_lock(); if (likely(test_bit(0, &b->up))) b->media->send_msg(net, skb, b, &tx_ctx->dst); else kfree_skb(skb); + rcu_read_unlock(); break; case -EINPROGRESS: return; From 53efe2e76ca2bfad7f35e0b5330e2ccd44a643e3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Aug 2020 00:16:46 +0900 Subject: [PATCH 257/290] kconfig: qconf: remove qInfo() to get back Qt4 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qconf is supposed to work with Qt4 and Qt5, but since commit c4f7398bee9c ("kconfig: qconf: make debug links work again"), building with Qt4 fails as follows: HOSTCXX scripts/kconfig/qconf.o scripts/kconfig/qconf.cc: In member function ‘void ConfigInfoView::clicked(const QUrl&)’: scripts/kconfig/qconf.cc:1241:3: error: ‘qInfo’ was not declared in this scope; did you mean ‘setInfo’? 1241 | qInfo() << "Clicked link is empty"; | ^~~~~ | setInfo scripts/kconfig/qconf.cc:1254:3: error: ‘qInfo’ was not declared in this scope; did you mean ‘setInfo’? 1254 | qInfo() << "Clicked symbol is invalid:" << data; | ^~~~~ | setInfo make[1]: *** [scripts/Makefile.host:129: scripts/kconfig/qconf.o] Error 1 make: *** [Makefile:606: xconfig] Error 2 qInfo() does not exist in Qt4. In my understanding, these call-sites should be unreachable. Perhaps, qWarning(), assertion, or something is better, but qInfo() is not the right one to use here, I think. Fixes: c4f7398bee9c ("kconfig: qconf: make debug links work again") Reported-by: Ronald Warsow Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 5a0aa159ec80..3a8f6251a727 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1238,7 +1238,6 @@ void ConfigInfoView::clicked(const QUrl &url) struct menu *m = NULL; if (count < 1) { - qInfo() << "Clicked link is empty"; delete[] data; return; } @@ -1251,7 +1250,6 @@ void ConfigInfoView::clicked(const QUrl &url) strcat(data, "$"); result = sym_re_search(data); if (!result) { - qInfo() << "Clicked symbol is invalid:" << data; delete[] data; return; } From 68fd110b3e7e2c90d781a132cc4362fbfb4a95a1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Aug 2020 02:43:27 +0900 Subject: [PATCH 258/290] kconfig: qconf: remove redundant help in the info view The same information is repeated in the info view. Remove the second one. Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 3a8f6251a727..4c5688a845f8 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1106,11 +1106,6 @@ void ConfigInfoView::menuInfo(void) if (showDebug()) debug = debug_info(sym); - - struct gstr help_gstr = str_new(); - menu_get_ext_help(_menu, &help_gstr); - help = print_filter(str_get(&help_gstr)); - str_free(&help_gstr); } else if (_menu->prompt) { head += ""; head += print_filter(_menu->prompt->text); @@ -1126,7 +1121,7 @@ void ConfigInfoView::menuInfo(void) if (showDebug()) debug += QString().sprintf("defined at %s:%d

", _menu->file->name, _menu->lineno); - setText(head + debug + help); + setText(head + debug); } QString ConfigInfoView::debug_info(struct symbol *sym) From 510bc3cb1ddc32f9533e6ed0a68c980544c3ca3f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Aug 2020 02:43:28 +0900 Subject: [PATCH 259/290] kconfig: qconf: replace deprecated QString::sprintf() with QTextStream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QString::sprintf() is deprecated in the latest Qt version, and spawns a lot of warnings: HOSTCXX scripts/kconfig/qconf.o scripts/kconfig/qconf.cc: In member function ‘void ConfigInfoView::menuInfo()’: scripts/kconfig/qconf.cc:1090:61: warning: ‘QString& QString::sprintf(const char*, ...)’ is deprecated: Use asprintf(), arg() or QTextStream instead [-Wdeprecated-declarations] 1090 | head += QString().sprintf("", sym->name); | ^ In file included from /usr/include/qt5/QtGui/qkeysequence.h:44, from /usr/include/qt5/QtWidgets/qaction.h:44, from /usr/include/qt5/QtWidgets/QAction:1, from scripts/kconfig/qconf.cc:7: /usr/include/qt5/QtCore/qstring.h:382:14: note: declared here 382 | QString &sprintf(const char *format, ...) Q_ATTRIBUTE_FORMAT_PRINTF(2, 3); | ^~~~~~~ scripts/kconfig/qconf.cc:1099:60: warning: ‘QString& QString::sprintf(const char*, ...)’ is deprecated: Use asprintf(), arg() or QTextStream instead [-Wdeprecated-declarations] 1099 | head += QString().sprintf("", sym->name); | ^ In file included from /usr/include/qt5/QtGui/qkeysequence.h:44, from /usr/include/qt5/QtWidgets/qaction.h:44, from /usr/include/qt5/QtWidgets/QAction:1, from scripts/kconfig/qconf.cc:7: /usr/include/qt5/QtCore/qstring.h:382:14: note: declared here 382 | QString &sprintf(const char *format, ...) Q_ATTRIBUTE_FORMAT_PRINTF(2, 3); | ^~~~~~~ scripts/kconfig/qconf.cc:1127:90: warning: ‘QString& QString::sprintf(const char*, ...)’ is deprecated: Use asprintf(), arg() or QTextStream instead [-Wdeprecated-declarations] 1127 | debug += QString().sprintf("defined at %s:%d

", _menu->file->name, _menu->lineno); | ^ In file included from /usr/include/qt5/QtGui/qkeysequence.h:44, from /usr/include/qt5/QtWidgets/qaction.h:44, from /usr/include/qt5/QtWidgets/QAction:1, from scripts/kconfig/qconf.cc:7: /usr/include/qt5/QtCore/qstring.h:382:14: note: declared here 382 | QString &sprintf(const char *format, ...) Q_ATTRIBUTE_FORMAT_PRINTF(2, 3); | ^~~~~~~ scripts/kconfig/qconf.cc: In member function ‘QString ConfigInfoView::debug_info(symbol*)’: scripts/kconfig/qconf.cc:1150:68: warning: ‘QString& QString::sprintf(const char*, ...)’ is deprecated: Use asprintf(), arg() or QTextStream instead [-Wdeprecated-declarations] 1150 | debug += QString().sprintf("prompt:
", sym->name); | ^ In file included from /usr/include/qt5/QtGui/qkeysequence.h:44, from /usr/include/qt5/QtWidgets/qaction.h:44, from /usr/include/qt5/QtWidgets/QAction:1, from scripts/kconfig/qconf.cc:7: /usr/include/qt5/QtCore/qstring.h:382:14: note: declared here 382 | QString &sprintf(const char *format, ...) Q_ATTRIBUTE_FORMAT_PRINTF(2, 3); | ^~~~~~~ scripts/kconfig/qconf.cc: In static member function ‘static void ConfigInfoView::expr_print_help(void*, symbol*, const char*)’: scripts/kconfig/qconf.cc:1225:59: warning: ‘QString& QString::sprintf(const char*, ...)’ is deprecated: Use asprintf(), arg() or QTextStream instead [-Wdeprecated-declarations] 1225 | *text += QString().sprintf("", sym->name); | ^ In file included from /usr/include/qt5/QtGui/qkeysequence.h:44, from /usr/include/qt5/QtWidgets/qaction.h:44, from /usr/include/qt5/QtWidgets/QAction:1, from scripts/kconfig/qconf.cc:7: /usr/include/qt5/QtCore/qstring.h:382:14: note: declared here 382 | QString &sprintf(const char *format, ...) Q_ATTRIBUTE_FORMAT_PRINTF(2, 3); | ^~~~~~~ The documentation also says: "Warning: We do not recommend using QString::asprintf() in new Qt code. Instead, consider using QTextStream or arg(), both of which support Unicode strings seamlessly and are type-safe." Use QTextStream as suggested. Reported-by: Robert Crawford Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 116 +++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 54 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 4c5688a845f8..8638785328a7 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1076,75 +1076,80 @@ void ConfigInfoView::symbolInfo(void) void ConfigInfoView::menuInfo(void) { struct symbol* sym; - QString head, debug, help; + QString info; + QTextStream stream(&info); sym = _menu->sym; if (sym) { if (_menu->prompt) { - head += ""; - head += print_filter(_menu->prompt->text); - head += ""; + stream << ""; + stream << print_filter(_menu->prompt->text); + stream << ""; if (sym->name) { - head += " ("; + stream << " ("; if (showDebug()) - head += QString().sprintf("", sym->name); - head += print_filter(sym->name); + stream << "name << "\">"; + stream << print_filter(sym->name); if (showDebug()) - head += ""; - head += ")"; + stream << ""; + stream << ")"; } } else if (sym->name) { - head += ""; + stream << ""; if (showDebug()) - head += QString().sprintf("", sym->name); - head += print_filter(sym->name); + stream << "name << "\">"; + stream << print_filter(sym->name); if (showDebug()) - head += ""; - head += ""; + stream << ""; + stream << ""; } - head += "

"; + stream << "

"; if (showDebug()) - debug = debug_info(sym); + stream << debug_info(sym); + } else if (_menu->prompt) { - head += ""; - head += print_filter(_menu->prompt->text); - head += "

"; + stream << ""; + stream << print_filter(_menu->prompt->text); + stream << "

"; if (showDebug()) { if (_menu->prompt->visible.expr) { - debug += "  dep: "; - expr_print(_menu->prompt->visible.expr, expr_print_help, &debug, E_NONE); - debug += "

"; + stream << "  dep: "; + expr_print(_menu->prompt->visible.expr, + expr_print_help, &stream, E_NONE); + stream << "

"; } } } if (showDebug()) - debug += QString().sprintf("defined at %s:%d

", _menu->file->name, _menu->lineno); + stream << "defined at " << _menu->file->name << ":" + << _menu->lineno << "

"; - setText(head + debug); + setText(info); } QString ConfigInfoView::debug_info(struct symbol *sym) { QString debug; + QTextStream stream(&debug); - debug += "type: "; - debug += print_filter(sym_type_name(sym->type)); + stream << "type: "; + stream << print_filter(sym_type_name(sym->type)); if (sym_is_choice(sym)) - debug += " (choice)"; + stream << " (choice)"; debug += "
"; if (sym->rev_dep.expr) { - debug += "reverse dep: "; - expr_print(sym->rev_dep.expr, expr_print_help, &debug, E_NONE); - debug += "
"; + stream << "reverse dep: "; + expr_print(sym->rev_dep.expr, expr_print_help, &stream, E_NONE); + stream << "
"; } for (struct property *prop = sym->prop; prop; prop = prop->next) { switch (prop->type) { case P_PROMPT: case P_MENU: - debug += QString().sprintf("prompt: ", sym->name); - debug += print_filter(prop->text); - debug += "
"; + stream << "prompt: name << "\">"; + stream << print_filter(prop->text); + stream << "
"; break; case P_DEFAULT: case P_SELECT: @@ -1152,30 +1157,33 @@ QString ConfigInfoView::debug_info(struct symbol *sym) case P_COMMENT: case P_IMPLY: case P_SYMBOL: - debug += prop_get_type_name(prop->type); - debug += ": "; - expr_print(prop->expr, expr_print_help, &debug, E_NONE); - debug += "
"; + stream << prop_get_type_name(prop->type); + stream << ": "; + expr_print(prop->expr, expr_print_help, + &stream, E_NONE); + stream << "
"; break; case P_CHOICE: if (sym_is_choice(sym)) { - debug += "choice: "; - expr_print(prop->expr, expr_print_help, &debug, E_NONE); - debug += "
"; + stream << "choice: "; + expr_print(prop->expr, expr_print_help, + &stream, E_NONE); + stream << "
"; } break; default: - debug += "unknown property: "; - debug += prop_get_type_name(prop->type); - debug += "
"; + stream << "unknown property: "; + stream << prop_get_type_name(prop->type); + stream << "
"; } if (prop->visible.expr) { - debug += "    dep: "; - expr_print(prop->visible.expr, expr_print_help, &debug, E_NONE); - debug += "
"; + stream << "    dep: "; + expr_print(prop->visible.expr, expr_print_help, + &stream, E_NONE); + stream << "
"; } } - debug += "
"; + stream << "
"; return debug; } @@ -1213,15 +1221,15 @@ QString ConfigInfoView::print_filter(const QString &str) void ConfigInfoView::expr_print_help(void *data, struct symbol *sym, const char *str) { - QString* text = reinterpret_cast(data); - QString str2 = print_filter(str); + QTextStream *stream = reinterpret_cast(data); if (sym && sym->name && !(sym->flags & SYMBOL_CONST)) { - *text += QString().sprintf("", sym->name); - *text += str2; - *text += ""; - } else - *text += str2; + *stream << "name << "\">"; + *stream << print_filter(str); + *stream << ""; + } else { + *stream << print_filter(str); + } } void ConfigInfoView::clicked(const QUrl &url) From d49f7d7376d0c0daf8680984a37bd07581ac7d38 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 31 Jul 2020 18:38:23 +0100 Subject: [PATCH 260/290] arm64: Move handling of erratum 1418040 into C code Instead of dealing with erratum 1418040 on each entry and exit, let's move the handling to __switch_to() instead, which has several advantages: - It can be applied when it matters (switching between 32 and 64 bit tasks). - It is written in C (yay!) - It can rely on static keys rather than alternatives Signed-off-by: Marc Zyngier Tested-by: Sai Prakash Ranjan Reviewed-by: Stephen Boyd Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200731173824.107480-2-maz@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 21 --------------------- arch/arm64/kernel/process.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2646178c8329..55af8b504b65 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -170,19 +170,6 @@ alternative_cb_end stp x28, x29, [sp, #16 * 14] .if \el == 0 - .if \regsize == 32 - /* - * If we're returning from a 32-bit task on a system affected by - * 1418040 then re-enable userspace access to the virtual counter. - */ -#ifdef CONFIG_ARM64_ERRATUM_1418040 -alternative_if ARM64_WORKAROUND_1418040 - mrs x0, cntkctl_el1 - orr x0, x0, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN - msr cntkctl_el1, x0 -alternative_else_nop_endif -#endif - .endif clear_gp_regs mrs x21, sp_el0 ldr_this_cpu tsk, __entry_task, x20 @@ -294,14 +281,6 @@ alternative_else_nop_endif tst x22, #PSR_MODE32_BIT // native task? b.eq 3f -#ifdef CONFIG_ARM64_ERRATUM_1418040 -alternative_if ARM64_WORKAROUND_1418040 - mrs x0, cntkctl_el1 - bic x0, x0, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN - msr cntkctl_el1, x0 -alternative_else_nop_endif -#endif - #ifdef CONFIG_ARM64_ERRATUM_845719 alternative_if ARM64_WORKAROUND_845719 #ifdef CONFIG_PID_IN_CONTEXTIDR diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 84ec630b8ab5..b63ce4c54cfe 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -515,6 +515,39 @@ static void entry_task_switch(struct task_struct *next) __this_cpu_write(__entry_task, next); } +/* + * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT. + * Assuming the virtual counter is enabled at the beginning of times: + * + * - disable access when switching from a 64bit task to a 32bit task + * - enable access when switching from a 32bit task to a 64bit task + */ +static void erratum_1418040_thread_switch(struct task_struct *prev, + struct task_struct *next) +{ + bool prev32, next32; + u64 val; + + if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) && + cpus_have_const_cap(ARM64_WORKAROUND_1418040))) + return; + + prev32 = is_compat_thread(task_thread_info(prev)); + next32 = is_compat_thread(task_thread_info(next)); + + if (prev32 == next32) + return; + + val = read_sysreg(cntkctl_el1); + + if (!next32) + val |= ARCH_TIMER_USR_VCT_ACCESS_EN; + else + val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN; + + write_sysreg(val, cntkctl_el1); +} + /* * Thread switching. */ @@ -530,6 +563,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, entry_task_switch(next); uao_thread_switch(next); ssbs_thread_switch(next); + erratum_1418040_thread_switch(prev, next); /* * Complete any pending TLB or cache maintenance on this CPU in case From bf87bb0881d0f59181fe3bbcf29c609f36483ff8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 31 Jul 2020 18:38:24 +0100 Subject: [PATCH 261/290] arm64: Allow booting of late CPUs affected by erratum 1418040 As we can now switch from a system that isn't affected by 1418040 to a system that globally is affected, let's allow affected CPUs to come in at a later time. Signed-off-by: Marc Zyngier Tested-by: Sai Prakash Ranjan Reviewed-by: Stephen Boyd Reviewed-by: Suzuki K Poulose Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200731173824.107480-3-maz@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6bd1d3ad037a..c332d49780dc 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -910,6 +910,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM erratum 1418040", .capability = ARM64_WORKAROUND_1418040, ERRATA_MIDR_RANGE_LIST(erratum_1418040_list), + .type = (ARM64_CPUCAP_SCOPE_LOCAL_CPU | + ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU), }, #endif #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT From abf532cceaca9c21a148498091f87de1b8ae9b49 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 3 Aug 2020 13:31:25 -0600 Subject: [PATCH 262/290] KVM: arm64: Print warning when cpu erratum can cause guests to deadlock If guests don't have certain CPU erratum workarounds implemented, then there is a possibility a guest can deadlock the system. IOW, only trusted guests should be used on systems with the erratum. This is the case for Cortex-A57 erratum 832075. Signed-off-by: Rob Herring Acked-by: Will Deacon Cc: Marc Zyngier Cc: James Morse Cc: Julien Thierry Cc: Suzuki K Poulose Cc: Will Deacon Cc: kvmarm@lists.cs.columbia.edu Link: https://lore.kernel.org/r/20200803193127.3012242-2-robh@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kvm/arm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 691d21e4c717..46dc3d75cf13 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1640,6 +1640,10 @@ int kvm_arch_init(void *opaque) return -ENODEV; } + if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)) + kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ + "Only trusted guests should be used on this system.\n"); + for_each_online_cpu(cpu) { smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); if (ret < 0) { From 541cebb51f3422d4f2c6cb95c1e5cc3dcc9e5021 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 21 Aug 2020 07:15:25 +0000 Subject: [PATCH 263/290] powerpc/32s: Fix module loading failure when VMALLOC_END is over 0xf0000000 In is_module_segment(), when VMALLOC_END is over 0xf0000000, ALIGN(VMALLOC_END, SZ_256M) has value 0. In that case, addr >= ALIGN(VMALLOC_END, SZ_256M) is always true then is_module_segment() always returns false. Use (ALIGN(VMALLOC_END, SZ_256M) - 1) which will have value 0xffffffff and will be suitable for the comparison. Fixes: c49643319715 ("powerpc/32s: Only leave NX unset on segments used for modules") Reported-by: Andreas Schwab Signed-off-by: Christophe Leroy Tested-by: Andreas Schwab Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/09fc73fe9c7423c6b4cf93f93df9bb0ed8eefab5.1597994047.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 82ae9e06a773..d426eaf76bb0 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -194,12 +194,12 @@ static bool is_module_segment(unsigned long addr) #ifdef MODULES_VADDR if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) return false; - if (addr >= ALIGN(MODULES_END, SZ_256M)) + if (addr > ALIGN(MODULES_END, SZ_256M) - 1) return false; #else if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) return false; - if (addr >= ALIGN(VMALLOC_END, SZ_256M)) + if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1) return false; #endif return true; From 64ef8f2c4791940d7f3945507b6a45c20d959260 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 21 Aug 2020 13:36:10 +0530 Subject: [PATCH 264/290] powerpc/perf/hv-24x7: Move cpumask file to top folder of hv-24x7 driver Commit 792f73f747b8 ("powerpc/hv-24x7: Add sysfs files inside hv-24x7 device to show cpumask") added cpumask file as part of hv-24x7 driver inside the interface folder. The cpumask file is supposed to be in the top folder of the PMU driver in order to make hotplug work. This patch fixes that issue and creates new group 'cpumask_attr_group' to add cpumask file and make sure it added in top folder. command:# cat /sys/devices/hv_24x7/cpumask 0 Fixes: 792f73f747b8 ("powerpc/hv-24x7: Add sysfs files inside hv-24x7 device to show cpumask") Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200821080610.123997-1-kjain@linux.ibm.com --- .../testing/sysfs-bus-event_source-devices-hv_24x7 | 2 +- arch/powerpc/perf/hv-24x7.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 index f7e32f218f73..e82fc37be802 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 @@ -43,7 +43,7 @@ Description: read only This sysfs interface exposes the number of cores per chip present in the system. -What: /sys/devices/hv_24x7/interface/cpumask +What: /sys/devices/hv_24x7/cpumask Date: July 2020 Contact: Linux on PowerPC Developer List Description: read only diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index cdb7bfbd157e..6e7e820508df 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1128,6 +1128,15 @@ static struct bin_attribute *if_bin_attrs[] = { NULL, }; +static struct attribute *cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = cpumask_attrs, +}; + static struct attribute *if_attrs[] = { &dev_attr_catalog_len.attr, &dev_attr_catalog_version.attr, @@ -1135,7 +1144,6 @@ static struct attribute *if_attrs[] = { &dev_attr_sockets.attr, &dev_attr_chipspersocket.attr, &dev_attr_coresperchip.attr, - &dev_attr_cpumask.attr, NULL, }; @@ -1151,6 +1159,7 @@ static const struct attribute_group *attr_groups[] = { &event_desc_group, &event_long_desc_group, &if_group, + &cpumask_attr_group, NULL, }; From f6da70d99c96256f8be0cbb4dd72d45d622c7823 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 10 Aug 2020 12:10:59 +0300 Subject: [PATCH 265/290] MAINTAINERS: Update Mellanox and Cumulus Network addresses to new domain Mellanox and Cumulus Network were acquired by Nvidia, so change the maintainers emails to new domain name. Link: https://lore.kernel.org/r/20200810091100.243932-1-leon@kernel.org Signed-off-by: Leon Romanovsky Acked-by: Andy Shevchenko Signed-off-by: Jason Gunthorpe --- .mailmap | 2 ++ MAINTAINERS | 58 ++++++++++++++++++++++++++--------------------------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/.mailmap b/.mailmap index 57fe0085b465..3ce5583d3d4b 100644 --- a/.mailmap +++ b/.mailmap @@ -128,6 +128,7 @@ Jan Glauber Jan Glauber Jan Glauber Jason Gunthorpe +Jason Gunthorpe Jason Gunthorpe Javi Merino @@ -172,6 +173,7 @@ Krzysztof Kozlowski Kuninori Morimoto Leon Romanovsky Leon Romanovsky +Leon Romanovsky Leonardo Bras Leonid I Ananiev Linas Vepstas diff --git a/MAINTAINERS b/MAINTAINERS index deaafb617361..f0068bceeb61 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5050,7 +5050,7 @@ F: include/linux/dm-*.h F: include/uapi/linux/dm-*.h DEVLINK -M: Jiri Pirko +M: Jiri Pirko L: netdev@vger.kernel.org S: Supported F: Documentation/networking/devlink @@ -6081,7 +6081,7 @@ F: include/linux/dynamic_debug.h F: lib/dynamic_debug.c DYNAMIC INTERRUPT MODERATION -M: Tal Gilboa +M: Tal Gilboa S: Maintained F: Documentation/networking/net_dim.rst F: include/linux/dim.h @@ -6161,7 +6161,7 @@ F: Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt F: drivers/edac/aspeed_edac.c EDAC-BLUEFIELD -M: Shravan Kumar Ramani +M: Shravan Kumar Ramani S: Supported F: drivers/edac/bluefield_edac.c @@ -6483,8 +6483,8 @@ S: Odd Fixes F: drivers/net/ethernet/agere/ ETHERNET BRIDGE -M: Roopa Prabhu -M: Nikolay Aleksandrov +M: Roopa Prabhu +M: Nikolay Aleksandrov L: bridge@lists.linux-foundation.org (moderated for non-subscribers) L: netdev@vger.kernel.org S: Maintained @@ -6599,7 +6599,7 @@ F: drivers/iommu/exynos-iommu.c EZchip NPS platform support M: Vineet Gupta -M: Ofer Levi +M: Ofer Levi S: Supported F: arch/arc/boot/dts/eznps.dts F: arch/arc/plat-eznps @@ -8563,7 +8563,7 @@ F: drivers/iio/pressure/dps310.c INFINIBAND SUBSYSTEM M: Doug Ledford -M: Jason Gunthorpe +M: Jason Gunthorpe L: linux-rdma@vger.kernel.org S: Supported W: https://github.com/linux-rdma/rdma-core @@ -9226,7 +9226,7 @@ F: drivers/firmware/iscsi_ibft* ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR M: Sagi Grimberg -M: Max Gurtovoy +M: Max Gurtovoy L: linux-rdma@vger.kernel.org S: Supported W: http://www.openfabrics.org @@ -11072,7 +11072,7 @@ F: Documentation/devicetree/bindings/input/touchscreen/melfas_mip4.txt F: drivers/input/touchscreen/melfas_mip4.c MELLANOX ETHERNET DRIVER (mlx4_en) -M: Tariq Toukan +M: Tariq Toukan L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com @@ -11080,7 +11080,7 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/ F: drivers/net/ethernet/mellanox/mlx4/en_* MELLANOX ETHERNET DRIVER (mlx5e) -M: Saeed Mahameed +M: Saeed Mahameed L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com @@ -11088,7 +11088,7 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/ F: drivers/net/ethernet/mellanox/mlx5/core/en_* MELLANOX ETHERNET INNOVA DRIVERS -R: Boris Pismenny +R: Boris Pismenny L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com @@ -11099,8 +11099,8 @@ F: drivers/net/ethernet/mellanox/mlx5/core/fpga/* F: include/linux/mlx5/mlx5_ifc_fpga.h MELLANOX ETHERNET SWITCH DRIVERS -M: Jiri Pirko -M: Ido Schimmel +M: Jiri Pirko +M: Ido Schimmel L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com @@ -11109,7 +11109,7 @@ F: drivers/net/ethernet/mellanox/mlxsw/ F: tools/testing/selftests/drivers/net/mlxsw/ MELLANOX FIRMWARE FLASH LIBRARY (mlxfw) -M: mlxsw@mellanox.com +M: mlxsw@nvidia.com L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com @@ -11119,7 +11119,7 @@ F: drivers/net/ethernet/mellanox/mlxfw/ MELLANOX HARDWARE PLATFORM SUPPORT M: Andy Shevchenko M: Darren Hart -M: Vadim Pasternak +M: Vadim Pasternak L: platform-driver-x86@vger.kernel.org S: Supported F: Documentation/ABI/testing/sysfs-platform-mellanox-bootctl @@ -11127,7 +11127,7 @@ F: drivers/platform/mellanox/ F: include/linux/platform_data/mlxreg.h MELLANOX MLX4 core VPI driver -M: Tariq Toukan +M: Tariq Toukan L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org S: Supported @@ -11137,7 +11137,7 @@ F: drivers/net/ethernet/mellanox/mlx4/ F: include/linux/mlx4/ MELLANOX MLX4 IB driver -M: Yishai Hadas +M: Yishai Hadas L: linux-rdma@vger.kernel.org S: Supported W: http://www.mellanox.com @@ -11147,8 +11147,8 @@ F: include/linux/mlx4/ F: include/uapi/rdma/mlx4-abi.h MELLANOX MLX5 core VPI driver -M: Saeed Mahameed -M: Leon Romanovsky +M: Saeed Mahameed +M: Leon Romanovsky L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org S: Supported @@ -11159,7 +11159,7 @@ F: drivers/net/ethernet/mellanox/mlx5/core/ F: include/linux/mlx5/ MELLANOX MLX5 IB driver -M: Leon Romanovsky +M: Leon Romanovsky L: linux-rdma@vger.kernel.org S: Supported W: http://www.mellanox.com @@ -11169,8 +11169,8 @@ F: include/linux/mlx5/ F: include/uapi/rdma/mlx5-abi.h MELLANOX MLXCPLD I2C AND MUX DRIVER -M: Vadim Pasternak -M: Michael Shych +M: Vadim Pasternak +M: Michael Shych L: linux-i2c@vger.kernel.org S: Supported F: Documentation/i2c/busses/i2c-mlxcpld.rst @@ -11178,7 +11178,7 @@ F: drivers/i2c/busses/i2c-mlxcpld.c F: drivers/i2c/muxes/i2c-mux-mlxcpld.c MELLANOX MLXCPLD LED DRIVER -M: Vadim Pasternak +M: Vadim Pasternak L: linux-leds@vger.kernel.org S: Supported F: Documentation/leds/leds-mlxcpld.rst @@ -11186,7 +11186,7 @@ F: drivers/leds/leds-mlxcpld.c F: drivers/leds/leds-mlxreg.c MELLANOX PLATFORM DRIVER -M: Vadim Pasternak +M: Vadim Pasternak L: platform-driver-x86@vger.kernel.org S: Supported F: drivers/platform/x86/mlx-platform.c @@ -12167,8 +12167,8 @@ F: net/ipv6/syncookies.c F: net/ipv6/tcp*.c NETWORKING [TLS] -M: Boris Pismenny -M: Aviad Yehezkel +M: Boris Pismenny +M: Aviad Yehezkel M: John Fastabend M: Daniel Borkmann M: Jakub Kicinski @@ -12468,7 +12468,7 @@ S: Supported F: drivers/nfc/nxp-nci OBJAGG -M: Jiri Pirko +M: Jiri Pirko L: netdev@vger.kernel.org S: Supported F: include/linux/objagg.h @@ -13110,7 +13110,7 @@ F: drivers/video/logo/logo_parisc* F: include/linux/hp_sdc.h PARMAN -M: Jiri Pirko +M: Jiri Pirko L: netdev@vger.kernel.org S: Supported F: include/linux/parman.h @@ -16034,7 +16034,7 @@ F: drivers/infiniband/sw/siw/ F: include/uapi/rdma/siw-abi.h SOFT-ROCE DRIVER (rxe) -M: Zhu Yanjun +M: Zhu Yanjun L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/sw/rxe/ From 6a3ea3e68b8a8a26c4aaac03432ed92269c9a14e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Aug 2020 06:52:29 -0400 Subject: [PATCH 266/290] x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM KVM has an optmization to avoid expensive MRS read/writes on VMENTER/EXIT. It caches the MSR values and restores them either when leaving the run loop, on preemption or when going out to user space. The affected MSRs are not required for kernel context operations. This changed with the recently introduced mechanism to handle FSGSBASE in the paranoid entry code which has to retrieve the kernel GSBASE value by accessing per CPU memory. The mechanism needs to retrieve the CPU number and uses either LSL or RDPID if the processor supports it. Unfortunately RDPID uses MSR_TSC_AUX which is in the list of cached and lazily restored MSRs, which means between the point where the guest value is written and the point of restore, MSR_TSC_AUX contains a random number. If an NMI or any other exception which uses the paranoid entry path happens in such a context, then RDPID returns the random guest MSR_TSC_AUX value. As a consequence this reads from the wrong memory location to retrieve the kernel GSBASE value. Kernel GS is used to for all regular this_cpu_*() operations. If the GSBASE in the exception handler points to the per CPU memory of a different CPU then this has the obvious consequences of data corruption and crashes. As the paranoid entry path is the only place which accesses MSR_TSX_AUX (via RDPID) and the fallback via LSL is not significantly slower, remove the RDPID alternative from the entry path and always use LSL. The alternative would be to write MSR_TSC_AUX on every VMENTER and VMEXIT which would be inflicting massive overhead on that code path. [ tglx: Rewrote changelog ] Fixes: eaad981291ee3 ("x86/entry/64: Introduce the FIND_PERCPU_BASE macro") Reported-by: Tom Lendacky Debugged-by: Tom Lendacky Suggested-by: Andy Lutomirski Suggested-by: Peter Zijlstra Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200821105229.18938-1-pbonzini@redhat.com --- arch/x86/entry/calling.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 98e4d8886f11..ae9b0d4615b3 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -374,12 +374,14 @@ For 32-bit we have the following conventions - kernel is built with * Fetch the per-CPU GSBASE value for this processor and put it in @reg. * We normally use %gs for accessing per-CPU data, but we are setting up * %gs here and obviously can not use %gs itself to access per-CPU data. + * + * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and + * may not restore the host's value until the CPU returns to userspace. + * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives + * while running KVM's run loop. */ .macro GET_PERCPU_BASE reg:req - ALTERNATIVE \ - "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \ - "RDPID \reg", \ - X86_FEATURE_RDPID + LOAD_CPU_AND_NODE_SEG_LIMIT \reg andq $VDSO_CPUNODE_MASK, \reg movq __per_cpu_offset(, \reg, 8), \reg .endm From d88d59b64ca35abae208e2781fdb45e69cbed56c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Aug 2020 21:44:39 +0200 Subject: [PATCH 267/290] core/entry: Respect syscall number rewrites The transcript of the x86 entry code to the generic version failed to reload the syscall number from ptregs after ptrace and seccomp have run, which both can modify the syscall number in ptregs. It returns the original syscall number instead which is obviously not the right thing to do. Reload the syscall number to fix that. Fixes: 142781e108b1 ("entry: Provide generic syscall entry functionality") Reported-by: Kyle Huey Signed-off-by: Thomas Gleixner Tested-by: Kyle Huey Tested-by: Kees Cook Acked-by: Kees Cook Link: https://lore.kernel.org/r/87blj6ifo8.fsf@nanos.tec.linutronix.de --- kernel/entry/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 9852e0d62d95..fcae019158ca 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -65,7 +65,8 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, syscall_enter_audit(regs, syscall); - return ret ? : syscall; + /* The above might have changed the syscall number */ + return ret ? : syscall_get_nr(current, regs); } noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) From 00c54a80cd36de5a0089e7ffd0a690c3a93e65d0 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 20 Aug 2020 17:41:56 -0700 Subject: [PATCH 268/290] mailmap: add Andi Kleen I keep getting bounce back from the suse.de address. Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Cc: Andi Kleen Cc: Jonathan Corbet Cc: Kees Cook Cc: Quentin Perret Link: http://lkml.kernel.org/r/20200818203214.659955-1-ndesaulniers@google.com Signed-off-by: Linus Torvalds --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 97fd835bd2a4..21f965400a28 100644 --- a/.mailmap +++ b/.mailmap @@ -32,6 +32,7 @@ Alex Shi Alex Shi Al Viro Al Viro +Andi Kleen Andi Shyti Andreas Herrmann Andrew Morton From d5a1695977cf60d3f7b387f4d18bd2fb2a9a7a01 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Thu, 20 Aug 2020 17:41:59 -0700 Subject: [PATCH 269/290] hugetlb_cgroup: convert comma to semicolon Replace a comma between expression statements by a semicolon. Fixes: faced7e0806cf4 ("mm: hugetlb controller for cgroups v2") Signed-off-by: Xu Wang Signed-off-by: Andrew Morton Cc: Tejun Heo Cc: Giuseppe Scrivano Link: http://lkml.kernel.org/r/20200818064333.21759-1-vulab@iscas.ac.cn Signed-off-by: Linus Torvalds --- mm/hugetlb_cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index aabf65d4d91b..1f87aec9ab5c 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -655,7 +655,7 @@ static void __init __hugetlb_cgroup_file_dfl_init(int idx) snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf); cft->private = MEMFILE_PRIVATE(idx, 0); cft->seq_show = hugetlb_events_show; - cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]), + cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]); cft->flags = CFTYPE_NOT_ON_ROOT; /* Add the events.local file */ @@ -664,7 +664,7 @@ static void __init __hugetlb_cgroup_file_dfl_init(int idx) cft->private = MEMFILE_PRIVATE(idx, 0); cft->seq_show = hugetlb_events_local_show; cft->file_offset = offsetof(struct hugetlb_cgroup, - events_local_file[idx]), + events_local_file[idx]); cft->flags = CFTYPE_NOT_ON_ROOT; /* NULL terminate the last cft */ From f3f99d63a8156c7a4a6b20aac22b53c5579c7dc1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 20 Aug 2020 17:42:02 -0700 Subject: [PATCH 270/290] khugepaged: adjust VM_BUG_ON_MM() in __khugepaged_enter() syzbot crashes on the VM_BUG_ON_MM(khugepaged_test_exit(mm), mm) in __khugepaged_enter(): yes, when one thread is about to dump core, has set core_state, and is waiting for others, another might do something calling __khugepaged_enter(), which now crashes because I lumped the core_state test (known as "mmget_still_valid") into khugepaged_test_exit(). I still think it's best to lump them together, so just in this exceptional case, check mm->mm_users directly instead of khugepaged_test_exit(). Fixes: bbe98f9cadff ("khugepaged: khugepaged_test_exit() check mmget_still_valid()") Reported-by: syzbot Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Acked-by: Yang Shi Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Song Liu Cc: Mike Kravetz Cc: Eric Dumazet Cc: [4.8+] Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008141503370.18085@eggly.anvils Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 15a9af791014..e749e568e1ea 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -466,7 +466,7 @@ int __khugepaged_enter(struct mm_struct *mm) return -ENOMEM; /* __khugepaged_exit() must not run from under us */ - VM_BUG_ON_MM(khugepaged_test_exit(mm), mm); + VM_BUG_ON_MM(atomic_read(&mm->mm_users) == 0, mm); if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) { free_mm_slot(mm_slot); return 0; From e47110e90584a22e9980510b00d0dfad3a83354e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 20 Aug 2020 17:42:05 -0700 Subject: [PATCH 271/290] mm/vunmap: add cond_resched() in vunmap_pmd_range Like zap_pte_range add cond_resched so that we can avoid softlockups as reported below. On non-preemptible kernel with large I/O map region (like the one we get when using persistent memory with sector mode), an unmap of the namespace can report below softlockups. 22724.027334] watchdog: BUG: soft lockup - CPU#49 stuck for 23s! [ndctl:50777] NIP [c0000000000dc224] plpar_hcall+0x38/0x58 LR [c0000000000d8898] pSeries_lpar_hpte_invalidate+0x68/0xb0 Call Trace: flush_hash_page+0x114/0x200 hpte_need_flush+0x2dc/0x540 vunmap_page_range+0x538/0x6f0 free_unmap_vmap_area+0x30/0x70 remove_vm_area+0xfc/0x140 __vunmap+0x68/0x270 __iounmap.part.0+0x34/0x60 memunmap+0x54/0x70 release_nodes+0x28c/0x300 device_release_driver_internal+0x16c/0x280 unbind_store+0x124/0x170 drv_attr_store+0x44/0x60 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1b0/0x290 __vfs_write+0x3c/0x70 vfs_write+0xd8/0x260 ksys_write+0xdc/0x130 system_call+0x5c/0x70 Reported-by: Harish Sriram Signed-off-by: Aneesh Kumar K.V Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Link: http://lkml.kernel.org/r/20200807075933.310240-1-aneesh.kumar@linux.ibm.com Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b482d240f9a2..be4724b916b3 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -104,6 +104,8 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, if (pmd_none_or_clear_bad(pmd)) continue; vunmap_pte_range(pmd, addr, next, mask); + + cond_resched(); } while (pmd++, addr = next, addr != end); } From 86f54bb7e4ff22ad5dd0b1e179610cc8bbb3bd63 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 20 Aug 2020 17:42:08 -0700 Subject: [PATCH 272/290] mm/rodata_test.c: fix missing function declaration The compilation with CONFIG_DEBUG_RODATA_TEST set produces the following warning due to the missing include. mm/rodata_test.c:15:6: warning: no previous prototype for 'rodata_test' [-Wmissing-prototypes] 15 | void rodata_test(void) | ^~~~~~~~~~~ Fixes: 2959a5f726f6 ("mm: add arch-independent testcases for RODATA") Signed-off-by: Leon Romanovsky Signed-off-by: Andrew Morton Reviewed-by: Anshuman Khandual Link: https://lkml.kernel.org/r/20200819080026.918134-1-leon@kernel.org Signed-off-by: Linus Torvalds --- mm/rodata_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/rodata_test.c b/mm/rodata_test.c index 2a99df7beeb3..2613371945b7 100644 --- a/mm/rodata_test.c +++ b/mm/rodata_test.c @@ -7,6 +7,7 @@ */ #define pr_fmt(fmt) "rodata_test: " fmt +#include #include #include From bcf85fcedfdd17911982a3e3564fcfec7b01eebd Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 20 Aug 2020 17:42:11 -0700 Subject: [PATCH 273/290] romfs: fix uninitialized memory leak in romfs_dev_read() romfs has a superblock field that limits the size of the filesystem; data beyond that limit is never accessed. romfs_dev_read() fetches a caller-supplied number of bytes from the backing device. It returns 0 on success or an error code on failure; therefore, its API can't represent short reads, it's all-or-nothing. However, when romfs_dev_read() detects that the requested operation would cross the filesystem size limit, it currently silently truncates the requested number of bytes. This e.g. means that when the content of a file with size 0x1000 starts one byte before the filesystem size limit, ->readpage() will only fill a single byte of the supplied page while leaving the rest uninitialized, leaking that uninitialized memory to userspace. Fix it by returning an error code instead of truncating the read when the requested read operation would go beyond the end of the filesystem. Fixes: da4458bda237 ("NOMMU: Make it possible for RomFS to use MTD devices directly") Signed-off-by: Jann Horn Signed-off-by: Andrew Morton Reviewed-by: Greg Kroah-Hartman Cc: David Howells Cc: Link: http://lkml.kernel.org/r/20200818013202.2246365-1-jannh@google.com Signed-off-by: Linus Torvalds --- fs/romfs/storage.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c index 6b2b4362089e..b57b3ffcbc32 100644 --- a/fs/romfs/storage.c +++ b/fs/romfs/storage.c @@ -217,10 +217,8 @@ int romfs_dev_read(struct super_block *sb, unsigned long pos, size_t limit; limit = romfs_maxsize(sb); - if (pos >= limit) + if (pos >= limit || buflen > limit - pos) return -EIO; - if (buflen > limit - pos) - buflen = limit - pos; #ifdef CONFIG_ROMFS_ON_MTD if (sb->s_mtd) From 71e843295c680898959b22dc877ae3839cc22470 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 20 Aug 2020 17:42:14 -0700 Subject: [PATCH 274/290] kernel/relay.c: fix memleak on destroy relay channel kmemleak report memory leak as follows: unreferenced object 0x607ee4e5f948 (size 8): comm "syz-executor.1", pid 2098, jiffies 4295031601 (age 288.468s) hex dump (first 8 bytes): 00 00 00 00 00 00 00 00 ........ backtrace: relay_open kernel/relay.c:583 [inline] relay_open+0xb6/0x970 kernel/relay.c:563 do_blk_trace_setup+0x4a8/0xb20 kernel/trace/blktrace.c:557 __blk_trace_setup+0xb6/0x150 kernel/trace/blktrace.c:597 blk_trace_ioctl+0x146/0x280 kernel/trace/blktrace.c:738 blkdev_ioctl+0xb2/0x6a0 block/ioctl.c:613 block_ioctl+0xe5/0x120 fs/block_dev.c:1871 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __x64_sys_ioctl+0x170/0x1ce fs/ioctl.c:739 do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 'chan->buf' is malloced in relay_open() by alloc_percpu() but not free while destroy the relay channel. Fix it by adding free_percpu() before return from relay_destroy_channel(). Fixes: 017c59c042d0 ("relay: Use per CPU constructs for the relay channel buffer pointers") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Andrew Morton Reviewed-by: Chris Wilson Cc: Al Viro Cc: Michael Ellerman Cc: David Rientjes Cc: Michel Lespinasse Cc: Daniel Axtens Cc: Thomas Gleixner Cc: Akash Goel Cc: Link: http://lkml.kernel.org/r/20200817122826.48518-1-weiyongjun1@huawei.com Signed-off-by: Linus Torvalds --- kernel/relay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/relay.c b/kernel/relay.c index 72fe443ea78f..fb4e0c530c08 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -197,6 +197,7 @@ static struct rchan_buf *relay_create_buf(struct rchan *chan) static void relay_destroy_channel(struct kref *kref) { struct rchan *chan = container_of(kref, struct rchan, kref); + free_percpu(chan->buf); kfree(chan); } From c17c3dc9d08b9aad9a55a1e53f205187972f448e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 20 Aug 2020 17:42:17 -0700 Subject: [PATCH 275/290] uprobes: __replace_page() avoid BUG in munlock_vma_page() syzbot crashed on the VM_BUG_ON_PAGE(PageTail) in munlock_vma_page(), when called from uprobes __replace_page(). Which of many ways to fix it? Settled on not calling when PageCompound (since Head and Tail are equals in this context, PageCompound the usual check in uprobes.c, and the prior use of FOLL_SPLIT_PMD will have cleared PageMlocked already). Fixes: 5a52c9df62b4 ("uprobe: use FOLL_SPLIT_PMD instead of FOLL_SPLIT") Reported-by: syzbot Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Reviewed-by: Srikar Dronamraju Acked-by: Song Liu Acked-by: Oleg Nesterov Cc: "Kirill A. Shutemov" Cc: [5.4+] Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008161338360.20413@eggly.anvils Signed-off-by: Linus Torvalds --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 649fd53dc9ad..0e18aaf23a7b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -205,7 +205,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, try_to_free_swap(old_page); page_vma_mapped_walk_done(&pvmw); - if (vma->vm_flags & VM_LOCKED) + if ((vma->vm_flags & VM_LOCKED) && !PageCompound(old_page)) munlock_vma_page(old_page); put_page(old_page); From f26044c83e6e473a61917f5db411d1417327d425 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 20 Aug 2020 17:42:21 -0700 Subject: [PATCH 276/290] squashfs: avoid bio_alloc() failure with 1Mbyte blocks This is a regression introduced by the patch "migrate from ll_rw_block usage to BIO". Bio_alloc() is limited to 256 pages (1 Mbyte). This can cause a failure when reading 1 Mbyte block filesystems. The problem is a datablock can be fully (or almost uncompressed), requiring 256 pages, but, because blocks are not aligned to page boundaries, it may require 257 pages to read. Bio_kmalloc() can handle 1024 pages, and so use this for the edge condition. Fixes: 93e72b3c612a ("squashfs: migrate from ll_rw_block usage to BIO") Reported-by: Nicolas Prochazka Reported-by: Tomoatsu Shimada Signed-off-by: Phillip Lougher Signed-off-by: Andrew Morton Reviewed-by: Guenter Roeck Cc: Philippe Liard Cc: Christoph Hellwig Cc: Adrien Schildknecht Cc: Daniel Rosenberg Cc: Link: http://lkml.kernel.org/r/20200815035637.15319-1-phillip@squashfs.org.uk Signed-off-by: Linus Torvalds --- fs/squashfs/block.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 76bb1c846845..8a19773b5a0b 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -87,7 +87,11 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length, int error, i; struct bio *bio; - bio = bio_alloc(GFP_NOIO, page_count); + if (page_count <= BIO_MAX_PAGES) + bio = bio_alloc(GFP_NOIO, page_count); + else + bio = bio_kmalloc(GFP_NOIO, page_count); + if (!bio) return -ENOMEM; From e08d3fdfe2dafa0331843f70ce1ff6c1c4900bf4 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 20 Aug 2020 17:42:24 -0700 Subject: [PATCH 277/290] mm: include CMA pages in lowmem_reserve at boot The lowmem_reserve arrays provide a means of applying pressure against allocations from lower zones that were targeted at higher zones. Its values are a function of the number of pages managed by higher zones and are assigned by a call to the setup_per_zone_lowmem_reserve() function. The function is initially called at boot time by the function init_per_zone_wmark_min() and may be called later by accesses of the /proc/sys/vm/lowmem_reserve_ratio sysctl file. The function init_per_zone_wmark_min() was moved up from a module_init to a core_initcall to resolve a sequencing issue with khugepaged. Unfortunately this created a sequencing issue with CMA page accounting. The CMA pages are added to the managed page count of a zone when cma_init_reserved_areas() is called at boot also as a core_initcall. This makes it uncertain whether the CMA pages will be added to the managed page counts of their zones before or after the call to init_per_zone_wmark_min() as it becomes dependent on link order. With the current link order the pages are added to the managed count after the lowmem_reserve arrays are initialized at boot. This means the lowmem_reserve values at boot may be lower than the values used later if /proc/sys/vm/lowmem_reserve_ratio is accessed even if the ratio values are unchanged. In many cases the difference is not significant, but for example an ARM platform with 1GB of memory and the following memory layout cma: Reserved 256 MiB at 0x0000000030000000 Zone ranges: DMA [mem 0x0000000000000000-0x000000002fffffff] Normal empty HighMem [mem 0x0000000030000000-0x000000003fffffff] would result in 0 lowmem_reserve for the DMA zone. This would allow userspace to deplete the DMA zone easily. Funnily enough $ cat /proc/sys/vm/lowmem_reserve_ratio would fix up the situation because as a side effect it forces setup_per_zone_lowmem_reserve. This commit breaks the link order dependency by invoking init_per_zone_wmark_min() as a postcore_initcall so that the CMA pages have the chance to be properly accounted in their zone(s) and allowing the lowmem_reserve arrays to receive consistent values. Fixes: bc22af74f271 ("mm: update min_free_kbytes from khugepaged after core initialization") Signed-off-by: Doug Berger Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Jason Baron Cc: David Rientjes Cc: "Kirill A. Shutemov" Cc: Link: http://lkml.kernel.org/r/1597423766-27849-1-git-send-email-opendmb@gmail.com Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0e2bab486fea..f4b3b91bbe7f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7888,7 +7888,7 @@ int __meminit init_per_zone_wmark_min(void) return 0; } -core_initcall(init_per_zone_wmark_min) +postcore_initcall(init_per_zone_wmark_min) /* * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so From 88e8ac11d2ea3acc003cf01bb5a38c8aa76c3cfd Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Thu, 20 Aug 2020 17:42:27 -0700 Subject: [PATCH 278/290] mm, page_alloc: fix core hung in free_pcppages_bulk() The following race is observed with the repeated online, offline and a delay between two successive online of memory blocks of movable zone. P1 P2 Online the first memory block in the movable zone. The pcp struct values are initialized to default values,i.e., pcp->high = 0 & pcp->batch = 1. Allocate the pages from the movable zone. Try to Online the second memory block in the movable zone thus it entered the online_pages() but yet to call zone_pcp_update(). This process is entered into the exit path thus it tries to release the order-0 pages to pcp lists through free_unref_page_commit(). As pcp->high = 0, pcp->count = 1 proceed to call the function free_pcppages_bulk(). Update the pcp values thus the new pcp values are like, say, pcp->high = 378, pcp->batch = 63. Read the pcp's batch value using READ_ONCE() and pass the same to free_pcppages_bulk(), pcp values passed here are, batch = 63, count = 1. Since num of pages in the pcp lists are less than ->batch, then it will stuck in while(list_empty(list)) loop with interrupts disabled thus a core hung. Avoid this by ensuring free_pcppages_bulk() is called with proper count of pcp list pages. The mentioned race is some what easily reproducible without [1] because pcp's are not updated for the first memory block online and thus there is a enough race window for P2 between alloc+free and pcp struct values update through onlining of second memory block. With [1], the race still exists but it is very narrow as we update the pcp struct values for the first memory block online itself. This is not limited to the movable zone, it could also happen in cases with the normal zone (e.g., hotplug to a node that only has DMA memory, or no other memory yet). [1]: https://patchwork.kernel.org/patch/11696389/ Fixes: 5f8dcc21211a ("page-allocator: split per-cpu list into one-list-per-migrate-type") Signed-off-by: Charan Teja Reddy Signed-off-by: Andrew Morton Acked-by: David Hildenbrand Acked-by: David Rientjes Acked-by: Michal Hocko Cc: Michal Hocko Cc: Vlastimil Babka Cc: Vinayak Menon Cc: [2.6+] Link: http://lkml.kernel.org/r/1597150703-19003-1-git-send-email-charante@codeaurora.org Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f4b3b91bbe7f..fab5e97dc9ca 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1302,6 +1302,11 @@ static void free_pcppages_bulk(struct zone *zone, int count, struct page *page, *tmp; LIST_HEAD(head); + /* + * Ensure proper count is passed which otherwise would stuck in the + * below while (list_empty(list)) loop. + */ + count = min(pcp->count, count); while (count) { struct list_head *list; From 5e0b17b026eb7c6de9baa9b0d45a51b05f05abe1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 21 Aug 2020 10:15:12 +0100 Subject: [PATCH 279/290] afs: Fix NULL deref in afs_dynroot_depopulate() If an error occurs during the construction of an afs superblock, it's possible that an error occurs after a superblock is created, but before we've created the root dentry. If the superblock has a dynamic root (ie. what's normally mounted on /afs), the afs_kill_super() will call afs_dynroot_depopulate() to unpin any created dentries - but this will oops if the root hasn't been created yet. Fix this by skipping that bit of code if there is no root dentry. This leads to an oops looking like: general protection fault, ... KASAN: null-ptr-deref in range [0x0000000000000068-0x000000000000006f] ... RIP: 0010:afs_dynroot_depopulate+0x25f/0x529 fs/afs/dynroot.c:385 ... Call Trace: afs_kill_super+0x13b/0x180 fs/afs/super.c:535 deactivate_locked_super+0x94/0x160 fs/super.c:335 afs_get_tree+0x1124/0x1460 fs/afs/super.c:598 vfs_get_tree+0x89/0x2f0 fs/super.c:1547 do_new_mount fs/namespace.c:2875 [inline] path_mount+0x1387/0x2070 fs/namespace.c:3192 do_mount fs/namespace.c:3205 [inline] __do_sys_mount fs/namespace.c:3413 [inline] __se_sys_mount fs/namespace.c:3390 [inline] __x64_sys_mount+0x27f/0x300 fs/namespace.c:3390 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 which is oopsing on this line: inode_lock(root->d_inode); presumably because sb->s_root was NULL. Fixes: 0da0b7fd73e4 ("afs: Display manually added cells in dynamic root mount") Reported-by: syzbot+c1eff8205244ae7e11a6@syzkaller.appspotmail.com Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/dynroot.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index b79879aacc02..7b784af604fd 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -382,15 +382,17 @@ void afs_dynroot_depopulate(struct super_block *sb) net->dynroot_sb = NULL; mutex_unlock(&net->proc_cells_lock); - inode_lock(root->d_inode); + if (root) { + inode_lock(root->d_inode); - /* Remove all the pins for dirs created for manually added cells */ - list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) { - if (subdir->d_fsdata) { - subdir->d_fsdata = NULL; - dput(subdir); + /* Remove all the pins for dirs created for manually added cells */ + list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) { + if (subdir->d_fsdata) { + subdir->d_fsdata = NULL; + dput(subdir); + } } - } - inode_unlock(root->d_inode); + inode_unlock(root->d_inode); + } } From 8d75785a814241587802655cc33e384230744f0c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 17 Aug 2020 18:49:50 -0700 Subject: [PATCH 280/290] ARM64: vdso32: Install vdso32 from vdso_install Add the 32-bit vdso Makefile to the vdso_install rule so that 'make vdso_install' installs the 32-bit compat vdso when it is compiled. Fixes: a7f71a2c8903 ("arm64: compat: Add vDSO") Signed-off-by: Stephen Boyd Reviewed-by: Vincenzo Frascino Acked-by: Will Deacon Cc: Vincenzo Frascino Link: https://lore.kernel.org/r/20200818014950.42492-1-swboyd@chromium.org Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 1 + arch/arm64/kernel/vdso32/Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 55bc8546d9c7..b45f0124cc16 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -165,6 +165,7 @@ zinstall install: PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@ # We use MRPROPER_FILES and CLEAN_FILES now archclean: diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 5139a5f19256..d6adb4677c25 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -208,7 +208,7 @@ quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ # Install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ +quiet_cmd_vdso_install = INSTALL32 $@ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so vdso.so: $(obj)/vdso.so.dbg From 774d977abfd024e6f73484544b9abe5a5cd62de7 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 21 Aug 2020 06:56:00 -0700 Subject: [PATCH 281/290] net: dsa: b53: check for timeout clang static analysis reports this problem b53_common.c:1583:13: warning: The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage ent.port &= ~BIT(port); ~~~~~~~~ ^ ent is set by a successful call to b53_arl_read(). Unsuccessful calls are caught by an switch statement handling specific returns. b32_arl_read() calls b53_arl_op_wait() which fails with the unhandled -ETIMEDOUT. So add -ETIMEDOUT to the switch statement. Because b53_arl_op_wait() already prints out a message, do not add another one. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Tom Rix Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 6500179c2ca2..0837ae0e0c5e 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1554,6 +1554,8 @@ static int b53_arl_op(struct b53_device *dev, int op, int port, return ret; switch (ret) { + case -ETIMEDOUT: + return ret; case -ENOSPC: dev_dbg(dev->dev, "{%pM,%.4d} no space left in ARL\n", addr, vid); From b16fc097bc283184cde40e5b30d15705e1590410 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Aug 2020 15:36:42 +0200 Subject: [PATCH 282/290] bpf: Fix two typos in uapi/linux/bpf.h Also remove trailing whitespaces in bpf_skb_get_tunnel_key example code. Signed-off-by: Tobias Klauser Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200821133642.18870-1-tklauser@distanz.ch --- include/uapi/linux/bpf.h | 10 +++++----- tools/include/uapi/linux/bpf.h | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0480f893facd..b6238b2209b7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -767,7 +767,7 @@ union bpf_attr { * * Also, note that **bpf_trace_printk**\ () is slow, and should * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and + * block (spanning several lines) is printed to kernel logs and * states that the helper should not be used "for production use" * the first time this helper is used (or more precisely, when * **trace_printk**\ () buffers are allocated). For passing values @@ -1033,14 +1033,14 @@ union bpf_attr { * * int ret; * struct bpf_tunnel_key key = {}; - * + * * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop packet - * + * * if (key.remote_ipv4 != 0x0a000001) * return TC_ACT_SHOT; // drop packet - * + * * return TC_ACT_OK; // accept packet * * This interface can also be used with all encapsulation devices @@ -1147,7 +1147,7 @@ union bpf_attr { * Description * Retrieve the realm or the route, that is to say the * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the + * identifier retrieved is a user-provided tag, similar to the * one used with the net_cls cgroup (see description for * **bpf_get_cgroup_classid**\ () helper), but here this tag is * held by a route (a destination entry), not by a task. diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0480f893facd..b6238b2209b7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -767,7 +767,7 @@ union bpf_attr { * * Also, note that **bpf_trace_printk**\ () is slow, and should * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and + * block (spanning several lines) is printed to kernel logs and * states that the helper should not be used "for production use" * the first time this helper is used (or more precisely, when * **trace_printk**\ () buffers are allocated). For passing values @@ -1033,14 +1033,14 @@ union bpf_attr { * * int ret; * struct bpf_tunnel_key key = {}; - * + * * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop packet - * + * * if (key.remote_ipv4 != 0x0a000001) * return TC_ACT_SHOT; // drop packet - * + * * return TC_ACT_OK; // accept packet * * This interface can also be used with all encapsulation devices @@ -1147,7 +1147,7 @@ union bpf_attr { * Description * Retrieve the realm or the route, that is to say the * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the + * identifier retrieved is a user-provided tag, similar to the * one used with the net_cls cgroup (see description for * **bpf_get_cgroup_classid**\ () helper), but here this tag is * held by a route (a destination entry), not by a task. From a326462cba6ae7398a5c997dddf3bea39555a825 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 19 Aug 2020 11:58:16 -0600 Subject: [PATCH 283/290] dt-bindings: PCI: intel,lgm-pcie: Fix matching on all snps,dw-pcie instances The intel,lgm-pcie binding is matching on all snps,dw-pcie instances which is wrong. Add a custom 'select' entry to fix this. Fixes: e54ea45a4955 ("dt-bindings: PCI: intel: Add YAML schemas for the PCIe RC controller") Cc: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Reviewed-by: Dilip Kota Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml b/Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml index 64b2c64ca806..a1e2be737eec 100644 --- a/Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml @@ -9,6 +9,14 @@ title: PCIe RC controller on Intel Gateway SoCs maintainers: - Dilip Kota +select: + properties: + compatible: + contains: + const: intel,lgm-pcie + required: + - compatible + properties: compatible: items: From 5f53584ce9c0cd9c92fb04717bb45bcd5719feaa Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Thu, 20 Aug 2020 13:02:04 +0300 Subject: [PATCH 284/290] dt-bindings: net: correct description of phy-connection-type The phy-connection-type parameter is described in ePAPR 1.1: Specifies interface type between the Ethernet device and a physical layer (PHY) device. The value of this property is specific to the implementation. Signed-off-by: Madalin Bucur Link: https://lore.kernel.org/r/1597917724-11127-1-git-send-email-madalin.bucur@oss.nxp.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/net/ethernet-controller.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 1c4474036d46..fa2baca8c726 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -54,7 +54,8 @@ properties: phy-connection-type: description: - Operation mode of the PHY interface + Specifies interface type between the Ethernet device and a physical + layer (PHY) device. enum: # There is not a standard bus between the MAC and the PHY, # something proprietary is being used to embed the PHY in the From fdfe7cbd58806522e799e2a50a15aee7f2cbb7b6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 11 Aug 2020 11:27:24 +0100 Subject: [PATCH 285/290] KVM: Pass MMU notifier range flags to kvm_unmap_hva_range() The 'flags' field of 'struct mmu_notifier_range' is used to indicate whether invalidate_range_{start,end}() are permitted to block. In the case of kvm_mmu_notifier_invalidate_range_start(), this field is not forwarded on to the architecture-specific implementation of kvm_unmap_hva_range() and therefore the backend cannot sensibly decide whether or not to block. Add an extra 'flags' parameter to kvm_unmap_hva_range() so that architectures are aware as to whether or not they are permitted to block. Cc: Cc: Marc Zyngier Cc: Suzuki K Poulose Cc: James Morse Signed-off-by: Will Deacon Message-Id: <20200811102725.7121-2-will@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/mmu.c | 2 +- arch/mips/include/asm/kvm_host.h | 2 +- arch/mips/kvm/mmu.c | 3 ++- arch/powerpc/include/asm/kvm_host.h | 3 ++- arch/powerpc/kvm/book3s.c | 3 ++- arch/powerpc/kvm/e500_mmu_host.c | 3 ++- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/mmu/mmu.c | 3 ++- virt/kvm/kvm_main.c | 3 ++- 10 files changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 65568b23868a..e52c927aade5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -473,7 +473,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, unsigned flags); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0121ef2c7c8d..dc351802ff18 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2213,7 +2213,7 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *dat } int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, unsigned flags) { if (!kvm->arch.mmu.pgd) return 0; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index d35eaed1668f..825d337a505a 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -969,7 +969,7 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, unsigned flags); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 87fa8d8a1031..28c366d307e7 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -486,7 +486,8 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, return 1; } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e020d269416d..10ded83414de 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -58,7 +58,8 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER extern int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, + unsigned flags); extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 41fedec69ac3..49db50d1db04 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -834,7 +834,8 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change); } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index d6c1069e9954..ed0c9c43d0cf 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -734,7 +734,8 @@ static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) return 0; } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { /* kvm_unmap_hva flushes everything anyways */ kvm_unmap_hva(kvm, start); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5ab3af7275d8..5303dbc5c9bc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1596,7 +1596,8 @@ asmlinkage void kvm_spurious_fault(void); _ASM_EXTABLE(666b, 667b) #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4e03841f053d..a5d0207e7189 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1916,7 +1916,8 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2c2c0254c2d8..4eaa4e46c7d0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -482,7 +482,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, * count is also read inside the mmu_lock critical section. */ kvm->mmu_notifier_count++; - need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end); + need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end, + range->flags); need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 11 Aug 2020 11:27:25 +0100 Subject: [PATCH 286/290] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is not set When an MMU notifier call results in unmapping a range that spans multiple PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary, since this avoids running into RCU stalls during VM teardown. Unfortunately, if the VM is destroyed as a result of OOM, then blocking is not permitted and the call to the scheduler triggers the following BUG(): | BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394 | in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper | INFO: lockdep is turned off. | CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1 | Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 | Call trace: | dump_backtrace+0x0/0x284 | show_stack+0x1c/0x28 | dump_stack+0xf0/0x1a4 | ___might_sleep+0x2bc/0x2cc | unmap_stage2_range+0x160/0x1ac | kvm_unmap_hva_range+0x1a0/0x1c8 | kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8 | __mmu_notifier_invalidate_range_start+0x218/0x31c | mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0 | __oom_reap_task_mm+0x128/0x268 | oom_reap_task+0xac/0x298 | oom_reaper+0x178/0x17c | kthread+0x1e4/0x1fc | ret_from_fork+0x10/0x30 Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier flags. Cc: Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd") Cc: Marc Zyngier Cc: Suzuki K Poulose Cc: James Morse Signed-off-by: Will Deacon Message-Id: <20200811102725.7121-3-will@kernel.org> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/mmu.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index dc351802ff18..ba00bcc0c884 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, * destroying the VM), otherwise another faulting VCPU may come in and mess * with things behind our backs. */ -static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) +static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size, + bool may_block) { struct kvm *kvm = mmu->kvm; pgd_t *pgd; @@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si * If the range is too large, release the kvm->mmu_lock * to prevent starvation and lockup detector warnings. */ - if (next != end) + if (may_block && next != end) cond_resched_lock(&kvm->mmu_lock); } while (pgd++, addr = next, addr != end); } +static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) +{ + __unmap_stage2_range(mmu, start, size, true); +} + static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { @@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm, static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) { - unmap_stage2_range(&kvm->arch.mmu, gpa, size); + unsigned flags = *(unsigned *)data; + bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE; + + __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block); return 0; } @@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, return 0; trace_kvm_unmap_hva_range(start, end); - handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); + handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags); return 0; } From 5cd841d2676a702e5f79a8bacbfbae3bfc2411f7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Aug 2020 11:20:58 +0200 Subject: [PATCH 287/290] dt-bindings: vendor-prefixes: Remove trailing whitespace Fixes: f516fb704d02fff2 ("dt-bindings: Whitespace clean-ups in schema files") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200819092058.1526-1-geert+renesas@glider.be Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 2baee2c817c1..63996ab03521 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -993,7 +993,7 @@ patternProperties: "^sst,.*": description: Silicon Storage Technology, Inc. "^sstar,.*": - description: Xiamen Xingchen(SigmaStar) Technology Co., Ltd. + description: Xiamen Xingchen(SigmaStar) Technology Co., Ltd. (formerly part of MStar Semiconductor, Inc.) "^st,.*": description: STMicroelectronics From eeaac3634ee0e3f35548be35275efeca888e9b23 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 22 Aug 2020 15:06:36 +0300 Subject: [PATCH 288/290] net: nexthop: don't allow empty NHA_GROUP Currently the nexthop code will use an empty NHA_GROUP attribute, but it requires at least 1 entry in order to function properly. Otherwise we end up derefencing null or random pointers all over the place due to not having any nh_grp_entry members allocated, nexthop code relies on having at least the first member present. Empty NHA_GROUP doesn't make any sense so just disallow it. Also add a WARN_ON for any future users of nexthop_create_group(). BUG: kernel NULL pointer dereference, address: 0000000000000080 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 0 PID: 558 Comm: ip Not tainted 5.9.0-rc1+ #93 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 RIP: 0010:fib_check_nexthop+0x4a/0xaa Code: 0f 84 83 00 00 00 48 c7 02 80 03 f7 81 c3 40 80 fe fe 75 12 b8 ea ff ff ff 48 85 d2 74 6b 48 c7 02 40 03 f7 81 c3 48 8b 40 10 <48> 8b 80 80 00 00 00 eb 36 80 78 1a 00 74 12 b8 ea ff ff ff 48 85 RSP: 0018:ffff88807983ba00 EFLAGS: 00010213 RAX: 0000000000000000 RBX: ffff88807983bc00 RCX: 0000000000000000 RDX: ffff88807983bc00 RSI: 0000000000000000 RDI: ffff88807bdd0a80 RBP: ffff88807983baf8 R08: 0000000000000dc0 R09: 000000000000040a R10: 0000000000000000 R11: ffff88807bdd0ae8 R12: 0000000000000000 R13: 0000000000000000 R14: ffff88807bea3100 R15: 0000000000000001 FS: 00007f10db393700(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000080 CR3: 000000007bd0f004 CR4: 00000000003706f0 Call Trace: fib_create_info+0x64d/0xaf7 fib_table_insert+0xf6/0x581 ? __vma_adjust+0x3b6/0x4d4 inet_rtm_newroute+0x56/0x70 rtnetlink_rcv_msg+0x1e3/0x20d ? rtnl_calcit.isra.0+0xb8/0xb8 netlink_rcv_skb+0x5b/0xac netlink_unicast+0xfa/0x17b netlink_sendmsg+0x334/0x353 sock_sendmsg_nosec+0xf/0x3f ____sys_sendmsg+0x1a0/0x1fc ? copy_msghdr_from_user+0x4c/0x61 ___sys_sendmsg+0x63/0x84 ? handle_mm_fault+0xa39/0x11b5 ? sockfd_lookup_light+0x72/0x9a __sys_sendmsg+0x50/0x6e do_syscall_64+0x54/0xbe entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f10dacc0bb7 Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb cd 66 0f 1f 44 00 00 8b 05 9a 4b 2b 00 85 c0 75 2e 48 63 ff 48 63 d2 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 b1 f2 2a 00 f7 d8 64 89 02 48 RSP: 002b:00007ffcbe628bf8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007ffcbe628f80 RCX: 00007f10dacc0bb7 RDX: 0000000000000000 RSI: 00007ffcbe628c60 RDI: 0000000000000003 RBP: 000000005f41099c R08: 0000000000000001 R09: 0000000000000008 R10: 00000000000005e9 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007ffcbe628d70 R15: 0000563a86c6e440 Modules linked in: CR2: 0000000000000080 CC: David Ahern Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Reported-by: syzbot+a61aa19b0c14c8770bd9@syzkaller.appspotmail.com Signed-off-by: Nikolay Aleksandrov Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/nexthop.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index cc8049b100b2..134e92382275 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -446,7 +446,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], unsigned int i, j; u8 nhg_fdb = 0; - if (len & (sizeof(struct nexthop_grp) - 1)) { + if (!len || len & (sizeof(struct nexthop_grp) - 1)) { NL_SET_ERR_MSG(extack, "Invalid length for nexthop group attribute"); return -EINVAL; @@ -1187,6 +1187,9 @@ static struct nexthop *nexthop_create_group(struct net *net, struct nexthop *nh; int i; + if (WARN_ON(!num_nh)) + return ERR_PTR(-EINVAL); + nh = nexthop_alloc(); if (!nh) return ERR_PTR(-ENOMEM); From a9ed4a6560b8562b7e2e2bed9527e88001f7b682 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 19 Aug 2020 17:12:17 +0100 Subject: [PATCH 289/290] epoll: Keep a reference on files added to the check list When adding a new fd to an epoll, and that this new fd is an epoll fd itself, we recursively scan the fds attached to it to detect cycles, and add non-epool files to a "check list" that gets subsequently parsed. However, this check list isn't completely safe when deletions can happen concurrently. To sidestep the issue, make sure that a struct file placed on the check list sees its f_count increased, ensuring that a concurrent deletion won't result in the file disapearing from under our feet. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Al Viro --- fs/eventpoll.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 12eebcdea9c8..196003d9242c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1994,9 +1994,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) * not already there, and calling reverse_path_check() * during ep_insert(). */ - if (list_empty(&epi->ffd.file->f_tfile_llink)) + if (list_empty(&epi->ffd.file->f_tfile_llink)) { + get_file(epi->ffd.file); list_add(&epi->ffd.file->f_tfile_llink, &tfile_check_list); + } } } mutex_unlock(&ep->mtx); @@ -2040,6 +2042,7 @@ static void clear_tfile_check_list(void) file = list_first_entry(&tfile_check_list, struct file, f_tfile_llink); list_del_init(&file->f_tfile_llink); + fput(file); } INIT_LIST_HEAD(&tfile_check_list); } @@ -2204,13 +2207,17 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, clear_tfile_check_list(); goto error_tgt_fput; } - } else + } else { + get_file(tf.file); list_add(&tf.file->f_tfile_llink, &tfile_check_list); + } error = epoll_mutex_lock(&ep->mtx, 0, nonblock); if (error) { out_del: list_del(&tf.file->f_tfile_llink); + if (!is_file_epoll(tf.file)) + fput(tf.file); goto error_tgt_fput; } if (is_file_epoll(tf.file)) { From 52c479697c9b73f628140dcdfcd39ea302d05482 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Aug 2020 18:25:52 -0400 Subject: [PATCH 290/290] do_epoll_ctl(): clean the failure exits up a bit Signed-off-by: Al Viro --- fs/eventpoll.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 196003d9242c..e0decff22ae2 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2203,29 +2203,22 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, full_check = 1; if (is_file_epoll(tf.file)) { error = -ELOOP; - if (ep_loop_check(ep, tf.file) != 0) { - clear_tfile_check_list(); + if (ep_loop_check(ep, tf.file) != 0) goto error_tgt_fput; - } } else { get_file(tf.file); list_add(&tf.file->f_tfile_llink, &tfile_check_list); } error = epoll_mutex_lock(&ep->mtx, 0, nonblock); - if (error) { -out_del: - list_del(&tf.file->f_tfile_llink); - if (!is_file_epoll(tf.file)) - fput(tf.file); + if (error) goto error_tgt_fput; - } if (is_file_epoll(tf.file)) { tep = tf.file->private_data; error = epoll_mutex_lock(&tep->mtx, 1, nonblock); if (error) { mutex_unlock(&ep->mtx); - goto out_del; + goto error_tgt_fput; } } } @@ -2246,8 +2239,6 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, error = ep_insert(ep, epds, tf.file, fd, full_check); } else error = -EEXIST; - if (full_check) - clear_tfile_check_list(); break; case EPOLL_CTL_DEL: if (epi) @@ -2270,8 +2261,10 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, mutex_unlock(&ep->mtx); error_tgt_fput: - if (full_check) + if (full_check) { + clear_tfile_check_list(); mutex_unlock(&epmutex); + } fdput(tf); error_fput: