From ce27046883806f77fe351036deb44b90e5803167 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Thu, 20 Apr 2023 14:27:59 -0700 Subject: [PATCH 001/187] usb: dwc3: gadget: Refactor EP0 forced stall/restart into a separate API [ Upstream commit 8f40fc0808137c157dd408d2632e63bfca2aecdb ] Several sequences utilize the same routine for forcing the control endpoint back into the SETUP phase. This is required, because those operations need to ensure that EP0 is back in the default state. Acked-by: Thinh Nguyen Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20230420212759.29429-3-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 3c7af52c7616 ("usb: dwc3: gadget: Queue PM runtime idle on disconnect event") Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 53 ++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 137602d9076f..339e8c3f7c50 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -139,6 +139,24 @@ int dwc3_gadget_set_link_state(struct dwc3 *dwc, enum dwc3_link_state state) return -ETIMEDOUT; } +static void dwc3_ep0_reset_state(struct dwc3 *dwc) +{ + unsigned int dir; + + if (dwc->ep0state != EP0_SETUP_PHASE) { + dir = !!dwc->ep0_expect_in; + if (dwc->ep0state == EP0_DATA_PHASE) + dwc3_ep0_end_control_data(dwc, dwc->eps[dir]); + else + dwc3_ep0_end_control_data(dwc, dwc->eps[!dir]); + + dwc->eps[0]->trb_enqueue = 0; + dwc->eps[1]->trb_enqueue = 0; + + dwc3_ep0_stall_and_restart(dwc); + } +} + /** * dwc3_ep_inc_trb - increment a trb index. * @index: Pointer to the TRB index to increment. @@ -2552,16 +2570,9 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) ret = wait_for_completion_timeout(&dwc->ep0_in_setup, msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT)); if (ret == 0) { - unsigned int dir; - dev_warn(dwc->dev, "wait for SETUP phase timed out\n"); spin_lock_irqsave(&dwc->lock, flags); - dir = !!dwc->ep0_expect_in; - if (dwc->ep0state == EP0_DATA_PHASE) - dwc3_ep0_end_control_data(dwc, dwc->eps[dir]); - else - dwc3_ep0_end_control_data(dwc, dwc->eps[!dir]); - dwc3_ep0_stall_and_restart(dwc); + dwc3_ep0_reset_state(dwc); spin_unlock_irqrestore(&dwc->lock, flags); } } @@ -3848,16 +3859,7 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc) dwc->setup_packet_pending = false; usb_gadget_set_state(dwc->gadget, USB_STATE_NOTATTACHED); - if (dwc->ep0state != EP0_SETUP_PHASE) { - unsigned int dir; - - dir = !!dwc->ep0_expect_in; - if (dwc->ep0state == EP0_DATA_PHASE) - dwc3_ep0_end_control_data(dwc, dwc->eps[dir]); - else - dwc3_ep0_end_control_data(dwc, dwc->eps[!dir]); - dwc3_ep0_stall_and_restart(dwc); - } + dwc3_ep0_reset_state(dwc); } static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) @@ -3913,20 +3915,7 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) * phase. So ensure that EP0 is in setup phase by issuing a stall * and restart if EP0 is not in setup phase. */ - if (dwc->ep0state != EP0_SETUP_PHASE) { - unsigned int dir; - - dir = !!dwc->ep0_expect_in; - if (dwc->ep0state == EP0_DATA_PHASE) - dwc3_ep0_end_control_data(dwc, dwc->eps[dir]); - else - dwc3_ep0_end_control_data(dwc, dwc->eps[!dir]); - - dwc->eps[0]->trb_enqueue = 0; - dwc->eps[1]->trb_enqueue = 0; - - dwc3_ep0_stall_and_restart(dwc); - } + dwc3_ep0_reset_state(dwc); /* * In the Synopsis DesignWare Cores USB3 Databook Rev. 3.30a From c8fe8ce07f1e5616ecb42027a09934748a99e51a Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 3 Jan 2024 13:49:46 -0800 Subject: [PATCH 002/187] usb: dwc3: gadget: Queue PM runtime idle on disconnect event [ Upstream commit 3c7af52c7616c3aa6dacd2336ec748d4a65df8f4 ] There is a scenario where DWC3 runtime suspend is blocked due to the dwc->connected flag still being true while PM usage_count is zero after DWC3 giveback is completed and the USB gadget session is being terminated. This leads to a case where nothing schedules a PM runtime idle for the device. The exact condition is seen with the following sequence: 1. USB bus reset is issued by the host 2. Shortly after, or concurrently, a USB PD DR SWAP request is received (sink->source) 3. USB bus reset event handler runs and issues dwc3_stop_active_transfers(), and pending transfer are stopped 4. DWC3 usage_count decremented to 0, and runtime idle occurs while dwc->connected == true, returns -EBUSY 5. DWC3 disconnect event seen, dwc->connected set to false due to DR swap handling 6. No runtime idle after this point Address this by issuing an asynchronous PM runtime idle call after the disconnect event is completed, as it modifies the dwc->connected flag, which is what blocks the initial runtime idle. Fixes: fc8bb91bc83e ("usb: dwc3: implement runtime PM") Cc: Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20240103214946.2596-1-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 339e8c3f7c50..5617a75b0d74 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3860,6 +3860,13 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc) usb_gadget_set_state(dwc->gadget, USB_STATE_NOTATTACHED); dwc3_ep0_reset_state(dwc); + + /* + * Request PM idle to address condition where usage count is + * already decremented to zero, but waiting for the disconnect + * interrupt to set dwc->connected to FALSE. + */ + pm_request_idle(dwc->dev); } static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) From c519a9054bf7026f308c6a09b29d02fb3a0f39d9 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Wed, 6 Dec 2023 12:18:14 -0800 Subject: [PATCH 003/187] usb: dwc3: gadget: Handle EP0 request dequeuing properly [ Upstream commit 730e12fbec53ab59dd807d981a204258a4cfb29a ] Current EP0 dequeue path will share the same as other EPs. However, there are some special considerations that need to be made for EP0 transfers: - EP0 transfers never transition into the started_list - EP0 only has one active request at a time In case there is a vendor specific control message for a function over USB FFS, then there is no guarantee on the timeline which the DATA/STATUS stage is responded to. While this occurs, any attempt to end transfers on non-control EPs will end up having the DWC3_EP_DELAY_STOP flag set, and defer issuing of the end transfer command. If the USB FFS application decides to timeout the control transfer, or if USB FFS AIO path exits, the USB FFS driver will issue a call to usb_ep_dequeue() for the ep0 request. In case of the AIO exit path, the AIO FS blocks until all pending USB requests utilizing the AIO path is completed. However, since the dequeue of ep0 req does not happen properly, all non-control EPs with the DWC3_EP_DELAY_STOP flag set will not be handled, and the AIO exit path will be stuck waiting for the USB FFS data endpoints to receive a completion callback. Fix is to utilize dwc3_ep0_reset_state() in the dequeue API to ensure EP0 is brought back to the SETUP state, and ensures that any deferred end transfer commands are handled. This also will end any active transfers on EP0, compared to the previous implementation which directly called giveback only. Fixes: fcd2def66392 ("usb: dwc3: gadget: Refactor dwc3_gadget_ep_dequeue") Cc: stable Signed-off-by: Wesley Cheng Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20231206201814.32664-1-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 5617a75b0d74..c4703f6b2089 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2093,7 +2093,17 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, list_for_each_entry(r, &dep->pending_list, list) { if (r == req) { - dwc3_gadget_giveback(dep, req, -ECONNRESET); + /* + * Explicitly check for EP0/1 as dequeue for those + * EPs need to be handled differently. Control EP + * only deals with one USB req, and giveback will + * occur during dwc3_ep0_stall_and_restart(). EP0 + * requests are never added to started_list. + */ + if (dep->number > 1) + dwc3_gadget_giveback(dep, req, -ECONNRESET); + else + dwc3_ep0_reset_state(dwc); goto out; } } From fafda9f08a7ce689e74206a4e2ff802dae7ac336 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Oct 2023 12:41:03 -0700 Subject: [PATCH 004/187] Revert "nSVM: Check for reserved encodings of TLB_CONTROL in nested VMCB" [ Upstream commit a484755ab2526ebdbe042397cdd6e427eb4b1a68 ] Revert KVM's made-up consistency check on SVM's TLB control. The APM says that unsupported encodings are reserved, but the APM doesn't state that VMRUN checks for a supported encoding. Unless something is called out in "Canonicalization and Consistency Checks" or listed as MBZ (Must Be Zero), AMD behavior is typically to let software shoot itself in the foot. This reverts commit 174a921b6975ef959dd82ee9e8844067a62e3ec1. Fixes: 174a921b6975 ("nSVM: Check for reserved encodings of TLB_CONTROL in nested VMCB") Reported-by: Stefan Sterz Closes: https://lkml.kernel.org/r/b9915c9c-4cf6-051a-2d91-44cc6380f455%40proxmox.com Cc: stable@vger.kernel.org Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20231018194104.1896415-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/nested.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index bc288e6bde64..5d4d78c9a787 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -239,18 +239,6 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size) kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1); } -static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl) -{ - /* Nested FLUSHBYASID is not supported yet. */ - switch(tlb_ctl) { - case TLB_CONTROL_DO_NOTHING: - case TLB_CONTROL_FLUSH_ALL_ASID: - return true; - default: - return false; - } -} - static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu, struct vmcb_ctrl_area_cached *control) { @@ -270,8 +258,6 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu, IOPM_SIZE))) return false; - if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl))) - return false; return true; } From 6f57121e9cbe762e421e4f20fb046406db6c2c61 Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Sat, 16 Dec 2023 14:46:37 -0300 Subject: [PATCH 005/187] iio: adc: ad7091r: Set alert bit in config register [ Upstream commit 149694f5e79b0c7a36ceb76e7c0d590db8f151c1 ] The ad7091r-base driver sets up an interrupt handler for firing events when inputs are either above or below a certain threshold. However, for the interrupt signal to come from the device it must be configured to enable the ALERT/BUSY/GPO pin to be used as ALERT, which was not being done until now. Enable interrupt signals on the ALERT/BUSY/GPO pin by setting the proper bit in the configuration register. Signed-off-by: Marcelo Schmitt Link: https://lore.kernel.org/r/e8da2ee98d6df88318b14baf3dc9630e20218418.1702746240.git.marcelo.schmitt1@gmail.com Signed-off-by: Jonathan Cameron Stable-dep-of: 020e71c7ffc2 ("iio: adc: ad7091r: Allow users to configure device events") Signed-off-by: Sasha Levin --- drivers/iio/adc/ad7091r-base.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iio/adc/ad7091r-base.c b/drivers/iio/adc/ad7091r-base.c index 0e5d3d2e9c98..8aaa854f816f 100644 --- a/drivers/iio/adc/ad7091r-base.c +++ b/drivers/iio/adc/ad7091r-base.c @@ -28,6 +28,7 @@ #define AD7091R_REG_RESULT_CONV_RESULT(x) ((x) & 0xfff) /* AD7091R_REG_CONF */ +#define AD7091R_REG_CONF_ALERT_EN BIT(4) #define AD7091R_REG_CONF_AUTO BIT(8) #define AD7091R_REG_CONF_CMD BIT(10) @@ -232,6 +233,11 @@ int ad7091r_probe(struct device *dev, const char *name, iio_dev->channels = chip_info->channels; if (irq) { + ret = regmap_update_bits(st->map, AD7091R_REG_CONF, + AD7091R_REG_CONF_ALERT_EN, BIT(4)); + if (ret) + return ret; + ret = devm_request_threaded_irq(dev, irq, NULL, ad7091r_event_handler, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, name, iio_dev); From 137568aa540a9f587c48ff7d4c51cdba08cfe9a4 Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Tue, 19 Dec 2023 17:26:01 -0300 Subject: [PATCH 006/187] iio: adc: ad7091r: Allow users to configure device events [ Upstream commit 020e71c7ffc25dfe29ed9be6c2d39af7bd7f661f ] AD7091R-5 devices are supported by the ad7091r-5 driver together with the ad7091r-base driver. Those drivers declared iio events for notifying user space when ADC readings fall bellow the thresholds of low limit registers or above the values set in high limit registers. However, to configure iio events and their thresholds, a set of callback functions must be implemented and those were not present until now. The consequence of trying to configure ad7091r-5 events without the proper callback functions was a null pointer dereference in the kernel because the pointers to the callback functions were not set. Implement event configuration callbacks allowing users to read/write event thresholds and enable/disable event generation. Since the event spec structs are generic to AD7091R devices, also move those from the ad7091r-5 driver the base driver so they can be reused when support for ad7091r-2/-4/-8 be added. Fixes: ca69300173b6 ("iio: adc: Add support for AD7091R5 ADC") Suggested-by: David Lechner Signed-off-by: Marcelo Schmitt Link: https://lore.kernel.org/r/59552d3548dabd56adc3107b7b4869afee2b0c3c.1703013352.git.marcelo.schmitt1@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/ad7091r-base.c | 156 +++++++++++++++++++++++++++++++++ drivers/iio/adc/ad7091r-base.h | 6 ++ drivers/iio/adc/ad7091r5.c | 28 +----- 3 files changed, 166 insertions(+), 24 deletions(-) diff --git a/drivers/iio/adc/ad7091r-base.c b/drivers/iio/adc/ad7091r-base.c index 8aaa854f816f..3d36bcd26b0c 100644 --- a/drivers/iio/adc/ad7091r-base.c +++ b/drivers/iio/adc/ad7091r-base.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -50,6 +51,27 @@ struct ad7091r_state { struct mutex lock; /*lock to prevent concurent reads */ }; +const struct iio_event_spec ad7091r_events[] = { + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_RISING, + .mask_separate = BIT(IIO_EV_INFO_VALUE) | + BIT(IIO_EV_INFO_ENABLE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_FALLING, + .mask_separate = BIT(IIO_EV_INFO_VALUE) | + BIT(IIO_EV_INFO_ENABLE), + }, + { + .type = IIO_EV_TYPE_THRESH, + .dir = IIO_EV_DIR_EITHER, + .mask_separate = BIT(IIO_EV_INFO_HYSTERESIS), + }, +}; +EXPORT_SYMBOL_NS_GPL(ad7091r_events, IIO_AD7091R); + static int ad7091r_set_mode(struct ad7091r_state *st, enum ad7091r_mode mode) { int ret, conf; @@ -169,8 +191,142 @@ static int ad7091r_read_raw(struct iio_dev *iio_dev, return ret; } +static int ad7091r_read_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + int val, ret; + + switch (dir) { + case IIO_EV_DIR_RISING: + ret = regmap_read(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + &val); + if (ret) + return ret; + return val != AD7091R_HIGH_LIMIT; + case IIO_EV_DIR_FALLING: + ret = regmap_read(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + &val); + if (ret) + return ret; + return val != AD7091R_LOW_LIMIT; + default: + return -EINVAL; + } +} + +static int ad7091r_write_event_config(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, int state) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + + if (state) { + return regmap_set_bits(st->map, AD7091R_REG_CONF, + AD7091R_REG_CONF_ALERT_EN); + } else { + /* + * Set thresholds either to 0 or to 2^12 - 1 as appropriate to + * prevent alerts and thus disable event generation. + */ + switch (dir) { + case IIO_EV_DIR_RISING: + return regmap_write(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + AD7091R_HIGH_LIMIT); + case IIO_EV_DIR_FALLING: + return regmap_write(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + AD7091R_LOW_LIMIT); + default: + return -EINVAL; + } + } +} + +static int ad7091r_read_event_value(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, int *val, int *val2) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + int ret; + + switch (info) { + case IIO_EV_INFO_VALUE: + switch (dir) { + case IIO_EV_DIR_RISING: + ret = regmap_read(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + val); + if (ret) + return ret; + return IIO_VAL_INT; + case IIO_EV_DIR_FALLING: + ret = regmap_read(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + val); + if (ret) + return ret; + return IIO_VAL_INT; + default: + return -EINVAL; + } + case IIO_EV_INFO_HYSTERESIS: + ret = regmap_read(st->map, + AD7091R_REG_CH_HYSTERESIS(chan->channel), + val); + if (ret) + return ret; + return IIO_VAL_INT; + default: + return -EINVAL; + } +} + +static int ad7091r_write_event_value(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + enum iio_event_type type, + enum iio_event_direction dir, + enum iio_event_info info, int val, int val2) +{ + struct ad7091r_state *st = iio_priv(indio_dev); + + switch (info) { + case IIO_EV_INFO_VALUE: + switch (dir) { + case IIO_EV_DIR_RISING: + return regmap_write(st->map, + AD7091R_REG_CH_HIGH_LIMIT(chan->channel), + val); + case IIO_EV_DIR_FALLING: + return regmap_write(st->map, + AD7091R_REG_CH_LOW_LIMIT(chan->channel), + val); + default: + return -EINVAL; + } + case IIO_EV_INFO_HYSTERESIS: + return regmap_write(st->map, + AD7091R_REG_CH_HYSTERESIS(chan->channel), + val); + default: + return -EINVAL; + } +} + static const struct iio_info ad7091r_info = { .read_raw = ad7091r_read_raw, + .read_event_config = &ad7091r_read_event_config, + .write_event_config = &ad7091r_write_event_config, + .read_event_value = &ad7091r_read_event_value, + .write_event_value = &ad7091r_write_event_value, }; static irqreturn_t ad7091r_event_handler(int irq, void *private) diff --git a/drivers/iio/adc/ad7091r-base.h b/drivers/iio/adc/ad7091r-base.h index 509748aef9b1..7a78976a2f80 100644 --- a/drivers/iio/adc/ad7091r-base.h +++ b/drivers/iio/adc/ad7091r-base.h @@ -8,6 +8,10 @@ #ifndef __DRIVERS_IIO_ADC_AD7091R_BASE_H__ #define __DRIVERS_IIO_ADC_AD7091R_BASE_H__ +/* AD7091R_REG_CH_LIMIT */ +#define AD7091R_HIGH_LIMIT 0xFFF +#define AD7091R_LOW_LIMIT 0x0 + struct device; struct ad7091r_state; @@ -17,6 +21,8 @@ struct ad7091r_chip_info { unsigned int vref_mV; }; +extern const struct iio_event_spec ad7091r_events[3]; + extern const struct regmap_config ad7091r_regmap_config; int ad7091r_probe(struct device *dev, const char *name, diff --git a/drivers/iio/adc/ad7091r5.c b/drivers/iio/adc/ad7091r5.c index 47f5763023a4..12d475463945 100644 --- a/drivers/iio/adc/ad7091r5.c +++ b/drivers/iio/adc/ad7091r5.c @@ -12,26 +12,6 @@ #include "ad7091r-base.h" -static const struct iio_event_spec ad7091r5_events[] = { - { - .type = IIO_EV_TYPE_THRESH, - .dir = IIO_EV_DIR_RISING, - .mask_separate = BIT(IIO_EV_INFO_VALUE) | - BIT(IIO_EV_INFO_ENABLE), - }, - { - .type = IIO_EV_TYPE_THRESH, - .dir = IIO_EV_DIR_FALLING, - .mask_separate = BIT(IIO_EV_INFO_VALUE) | - BIT(IIO_EV_INFO_ENABLE), - }, - { - .type = IIO_EV_TYPE_THRESH, - .dir = IIO_EV_DIR_EITHER, - .mask_separate = BIT(IIO_EV_INFO_HYSTERESIS), - }, -}; - #define AD7091R_CHANNEL(idx, bits, ev, num_ev) { \ .type = IIO_VOLTAGE, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ @@ -44,10 +24,10 @@ static const struct iio_event_spec ad7091r5_events[] = { .scan_type.realbits = bits, \ } static const struct iio_chan_spec ad7091r5_channels_irq[] = { - AD7091R_CHANNEL(0, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), - AD7091R_CHANNEL(1, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), - AD7091R_CHANNEL(2, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), - AD7091R_CHANNEL(3, 12, ad7091r5_events, ARRAY_SIZE(ad7091r5_events)), + AD7091R_CHANNEL(0, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), + AD7091R_CHANNEL(1, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), + AD7091R_CHANNEL(2, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), + AD7091R_CHANNEL(3, 12, ad7091r_events, ARRAY_SIZE(ad7091r_events)), }; static const struct iio_chan_spec ad7091r5_channels_noirq[] = { From 852b6b2a2f7b7b2f8e62da77f207a389bf6f326b Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Wed, 13 Dec 2023 16:16:35 +1100 Subject: [PATCH 007/187] ext4: allow for the last group to be marked as trimmed commit 7c784d624819acbeefb0018bac89e632467cca5a upstream. The ext4 filesystem tracks the trim status of blocks at the group level. When an entire group has been trimmed then it is marked as such and subsequent trim invocations with the same minimum trim size will not be attempted on that group unless it is marked as able to be trimmed again such as when a block is freed. Currently the last group can't be marked as trimmed due to incorrect logic in ext4_last_grp_cluster(). ext4_last_grp_cluster() is supposed to return the zero based index of the last cluster in a group. This is then used by ext4_try_to_trim_range() to determine if the trim operation spans the entire group and as such if the trim status of the group should be recorded. ext4_last_grp_cluster() takes a 0 based group index, thus the valid values for grp are 0..(ext4_get_groups_count - 1). Any group index less than (ext4_get_groups_count - 1) is not the last group and must have EXT4_CLUSTERS_PER_GROUP(sb) clusters. For the last group we need to calculate the number of clusters based on the number of blocks in the group. Finally subtract 1 from the number of clusters as zero based indexing is expected. Rearrange the function slightly to make it clear what we are calculating and returning. Reproducer: // Create file system where the last group has fewer blocks than // blocks per group $ mkfs.ext4 -b 4096 -g 8192 /dev/nvme0n1 8191 $ mount /dev/nvme0n1 /mnt Before Patch: $ fstrim -v /mnt /mnt: 25.9 MiB (27156480 bytes) trimmed // Group not marked as trimmed so second invocation still discards blocks $ fstrim -v /mnt /mnt: 25.9 MiB (27156480 bytes) trimmed After Patch: fstrim -v /mnt /mnt: 25.9 MiB (27156480 bytes) trimmed // Group marked as trimmed so second invocation DOESN'T discard any blocks fstrim -v /mnt /mnt: 0 B (0 bytes) trimmed Fixes: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") Cc: # 4.19+ Signed-off-by: Suraj Jitindar Singh Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20231213051635.37731-1-surajjs@amazon.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a6e41746890d..33be702d6e38 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6421,11 +6421,16 @@ __acquires(bitlock) static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, ext4_group_t grp) { - if (grp < ext4_get_groups_count(sb)) - return EXT4_CLUSTERS_PER_GROUP(sb) - 1; - return (ext4_blocks_count(EXT4_SB(sb)->s_es) - - ext4_group_first_block_no(sb, grp) - 1) >> - EXT4_CLUSTER_BITS(sb); + unsigned long nr_clusters_in_group; + + if (grp < (ext4_get_groups_count(sb) - 1)) + nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb); + else + nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) - + ext4_group_first_block_no(sb, grp)) + >> EXT4_CLUSTER_BITS(sb); + + return nr_clusters_in_group - 1; } static bool ext4_trim_interrupted(void) From 4b5f8a187f10a6684bf38c49a73c7d0d6dffe299 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 14 Dec 2023 11:18:50 -0500 Subject: [PATCH 008/187] arm64: properly install vmlinuz.efi commit 7b21ed7d119dc06b0ed2ba3e406a02cafe3a8d03 upstream. If you select CONFIG_EFI_ZBOOT, we will generate vmlinuz.efi, and then when we go to install the kernel we'll install the vmlinux instead because install.sh only recognizes Image.gz as wanting the compressed install image. With CONFIG_EFI_ZBOOT we don't get the proper kernel installed, which means it doesn't boot, which makes for a very confused and subsequently angry kernel developer. Fix this by properly installing our compressed kernel if we've enabled CONFIG_EFI_ZBOOT. Signed-off-by: Josef Bacik Cc: # 6.1.x Fixes: c37b830fef13 ("arm64: efi: enable generic EFI compressed boot") Reviewed-by: Simon Glass Link: https://lore.kernel.org/r/6edb1402769c2c14c4fbef8f7eaedb3167558789.1702570674.git.josef@toxicpanda.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/install.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh index 7399d706967a..9b7a09808a3d 100755 --- a/arch/arm64/boot/install.sh +++ b/arch/arm64/boot/install.sh @@ -17,7 +17,8 @@ # $3 - kernel map file # $4 - default install path (blank if root directory) -if [ "$(basename $2)" = "Image.gz" ]; then +if [ "$(basename $2)" = "Image.gz" ] || [ "$(basename $2)" = "vmlinuz.efi" ] +then # Compressed install echo "Installing compressed kernel" base=vmlinuz From 29e2da3eabd8dd793b9c8aa0318acb97a41aa153 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 5 Jan 2024 13:55:37 +0530 Subject: [PATCH 009/187] OPP: Pass rounded rate to _set_opp() commit 7269c250db1b89cda72ca419b7bd5e37997309d6 upstream. The OPP core finds the eventual frequency to set with the help of clk_round_rate() and the same was earlier getting passed to _set_opp() and that's what would get configured. The commit 1efae8d2e777 ("OPP: Make dev_pm_opp_set_opp() independent of frequency") mistakenly changed that. Fix it. Fixes: 1efae8d2e777 ("OPP: Make dev_pm_opp_set_opp() independent of frequency") Cc: v5.18+ # v6.0+ Signed-off-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/opp/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index f0d70ecc0271..71d3e3ba909a 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1226,12 +1226,12 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) * value of the frequency. In such a case, do not abort but * configure the hardware to the desired frequency forcefully. */ - forced = opp_table->rate_clk_single != target_freq; + forced = opp_table->rate_clk_single != freq; } - ret = _set_opp(dev, opp_table, opp, &target_freq, forced); + ret = _set_opp(dev, opp_table, opp, &freq, forced); - if (target_freq) + if (freq) dev_pm_opp_put(opp); put_opp_table: From 562850a008db621e3ce52447f936ac6911f79d48 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 8 Dec 2023 11:41:56 +1100 Subject: [PATCH 010/187] btrfs: sysfs: validate scrub_speed_max value commit 2b0122aaa800b021e36027d7f29e206f87c761d6 upstream. The value set as scrub_speed_max accepts size with suffixes (k/m/g/t/p/e) but we should still validate it for trailing characters, similar to what we do with chunk_size_store. CC: stable@vger.kernel.org # 5.15+ Signed-off-by: David Disseldorp Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 74fef1f49c35..fc468d1079c2 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1704,6 +1704,10 @@ static ssize_t btrfs_devinfo_scrub_speed_max_store(struct kobject *kobj, unsigned long long limit; limit = memparse(buf, &endptr); + /* There could be trailing '\n', also catch any typos after the value. */ + endptr = skip_spaces(endptr); + if (*endptr != 0) + return -EINVAL; WRITE_ONCE(device->scrub_speed_max, limit); return len; } From 680eb0a99336f7b21ff149bc57579d059421c5de Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 7 Dec 2023 18:36:57 +0800 Subject: [PATCH 011/187] crypto: api - Disallow identical driver names commit 27016f75f5ed47e2d8e0ca75a8ff1f40bc1a5e27 upstream. Disallow registration of two algorithms with identical driver names. Cc: Reported-by: Ovidiu Panait Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algapi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/algapi.c b/crypto/algapi.c index 5dc9ccdd5a51..c73d1359b9d4 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -290,6 +290,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) } if (!strcmp(q->cra_driver_name, alg->cra_name) || + !strcmp(q->cra_driver_name, alg->cra_driver_name) || !strcmp(q->cra_name, alg->cra_driver_name)) goto err; } From 3a081586c75398ba9813fb5c69ba55f9145fd6bb Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Thu, 16 Nov 2023 08:56:09 +0800 Subject: [PATCH 012/187] PM: hibernate: Enforce ordering during image compression/decompression commit 71cd7e80cfde548959952eac7063aeaea1f2e1c6 upstream. An S4 (suspend to disk) test on the LoongArch 3A6000 platform sometimes fails with the following error messaged in the dmesg log: Invalid LZO compressed length That happens because when compressing/decompressing the image, the synchronization between the control thread and the compress/decompress/crc thread is based on a relaxed ordering interface, which is unreliable, and the following situation may occur: CPU 0 CPU 1 save_image_lzo lzo_compress_threadfn atomic_set(&d->stop, 1); atomic_read(&data[thr].stop) data[thr].cmp = data[thr].cmp_len; WRITE data[thr].cmp_len Then CPU0 gets a stale cmp_len and writes it to disk. During resume from S4, wrong cmp_len is loaded. To maintain data consistency between the two threads, use the acquire/release variants of atomic set and read operations. Fixes: 081a9d043c98 ("PM / Hibernate: Improve performance of LZO/plain hibernation, checksum image") Cc: All applicable Signed-off-by: Hongchen Zhang Co-developed-by: Weihao Li Signed-off-by: Weihao Li [ rjw: Subject rewrite and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/swap.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index cc44c37699de..5ab54ab5ace7 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -605,11 +605,11 @@ static int crc32_threadfn(void *data) unsigned i; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -618,7 +618,7 @@ static int crc32_threadfn(void *data) for (i = 0; i < d->run_threads; i++) *d->crc32 = crc32_le(*d->crc32, d->unc[i], *d->unc_len[i]); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -648,12 +648,12 @@ static int lzo_compress_threadfn(void *data) struct cmp_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -662,7 +662,7 @@ static int lzo_compress_threadfn(void *data) d->ret = lzo1x_1_compress(d->unc, d->unc_len, d->cmp + LZO_HEADER, &d->cmp_len, d->wrk); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -797,7 +797,7 @@ static int save_image_lzo(struct swap_map_handle *handle, data[thr].unc_len = off; - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -805,12 +805,12 @@ static int save_image_lzo(struct swap_map_handle *handle, break; crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -849,7 +849,7 @@ static int save_image_lzo(struct swap_map_handle *handle, } } - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } @@ -1131,12 +1131,12 @@ static int lzo_decompress_threadfn(void *data) struct dec_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -1149,7 +1149,7 @@ static int lzo_decompress_threadfn(void *data) flush_icache_range((unsigned long)d->unc, (unsigned long)d->unc + d->unc_len); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -1334,7 +1334,7 @@ static int load_image_lzo(struct swap_map_handle *handle, } if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); crc->run_threads = 0; } @@ -1370,7 +1370,7 @@ static int load_image_lzo(struct swap_map_handle *handle, pg = 0; } - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -1389,7 +1389,7 @@ static int load_image_lzo(struct swap_map_handle *handle, for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -1420,7 +1420,7 @@ static int load_image_lzo(struct swap_map_handle *handle, ret = snapshot_write_next(snapshot); if (ret <= 0) { crc->run_threads = thr + 1; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); goto out_finish; } @@ -1428,13 +1428,13 @@ static int load_image_lzo(struct swap_map_handle *handle, } crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); } out_finish: if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } stop = ktime_get(); From aa8aa16ed9adf1df05bb339d588cf485a011839e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 2 Dec 2023 09:01:54 +0800 Subject: [PATCH 013/187] hwrng: core - Fix page fault dead lock on mmap-ed hwrng commit 78aafb3884f6bc6636efcc1760c891c8500b9922 upstream. There is a dead-lock in the hwrng device read path. This triggers when the user reads from /dev/hwrng into memory also mmap-ed from /dev/hwrng. The resulting page fault triggers a recursive read which then dead-locks. Fix this by using a stack buffer when calling copy_to_user. Reported-by: Edward Adam Davis Reported-by: syzbot+c52ab18308964d248092@syzkaller.appspotmail.com Fixes: 9996508b3353 ("hwrng: core - Replace u32 in driver API with byte array") Cc: Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/core.c | 36 +++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 8f31f9d81030..c319f9937709 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -24,10 +24,13 @@ #include #include #include +#include #include #define RNG_MODULE_NAME "hw_random" +#define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES) + static struct hwrng *current_rng; /* the current rng has been explicitly chosen by user via sysfs */ static int cur_rng_set_by_user; @@ -59,7 +62,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static size_t rng_buffer_size(void) { - return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES; + return RNG_BUFFER_SIZE; } static void add_early_randomness(struct hwrng *rng) @@ -211,6 +214,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { + u8 buffer[RNG_BUFFER_SIZE]; ssize_t ret = 0; int err = 0; int bytes_read, len; @@ -238,34 +242,37 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (bytes_read < 0) { err = bytes_read; goto out_unlock_reading; - } - data_avail = bytes_read; - } - - if (!data_avail) { - if (filp->f_flags & O_NONBLOCK) { + } else if (bytes_read == 0 && + (filp->f_flags & O_NONBLOCK)) { err = -EAGAIN; goto out_unlock_reading; } - } else { - len = data_avail; + + data_avail = bytes_read; + } + + len = data_avail; + if (len) { if (len > size) len = size; data_avail -= len; - if (copy_to_user(buf + ret, rng_buffer + data_avail, - len)) { + memcpy(buffer, rng_buffer + data_avail, len); + } + mutex_unlock(&reading_mutex); + put_rng(rng); + + if (len) { + if (copy_to_user(buf + ret, buffer, len)) { err = -EFAULT; - goto out_unlock_reading; + goto out; } size -= len; ret += len; } - mutex_unlock(&reading_mutex); - put_rng(rng); if (need_resched()) schedule_timeout_interruptible(1); @@ -276,6 +283,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } } out: + memzero_explicit(buffer, sizeof(buffer)); return ret ? : err; out_unlock_reading: From dbc9a791a70ea47be9f2acf251700fe254a2ab23 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 28 Nov 2023 14:22:13 +0800 Subject: [PATCH 014/187] crypto: s390/aes - Fix buffer overread in CTR mode commit d07f951903fa9922c375b8ab1ce81b18a0034e3b upstream. When processing the last block, the s390 ctr code will always read a whole block, even if there isn't a whole block of data left. Fix this by using the actual length left and copy it into a buffer first for processing. Fixes: 0200f3ecc196 ("crypto: s390 - add System z hardware support for CTR mode") Cc: Reported-by: Guangwu Zhang Signed-off-by: Herbert Xu Reviewd-by: Harald Freudenberger Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/s390/crypto/aes_s390.c | 4 +++- arch/s390/crypto/paes_s390.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 526c3f40f6a2..7b66e81e5f87 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -601,7 +601,9 @@ static int ctr_aes_crypt(struct skcipher_request *req) * final block may be < AES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { - cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr, + memset(buf, 0, AES_BLOCK_SIZE); + memcpy(buf, walk.src.virt.addr, nbytes); + cpacf_kmctr(sctx->fc, sctx->key, buf, buf, AES_BLOCK_SIZE, walk.iv); memcpy(walk.dst.virt.addr, buf, nbytes); crypto_inc(walk.iv, AES_BLOCK_SIZE); diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 621322eb0e68..d84d87349718 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -688,9 +688,11 @@ static int ctr_paes_crypt(struct skcipher_request *req) * final block may be < AES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { + memset(buf, 0, AES_BLOCK_SIZE); + memcpy(buf, walk.src.virt.addr, nbytes); while (1) { if (cpacf_kmctr(ctx->fc, ¶m, buf, - walk.src.virt.addr, AES_BLOCK_SIZE, + buf, AES_BLOCK_SIZE, walk.iv) == AES_BLOCK_SIZE) break; if (__paes_convert_key(ctx)) From f4518de40a212c14f043fc9cfeaab3b4076c2cf8 Mon Sep 17 00:00:00 2001 From: Anthony Krowiak Date: Thu, 9 Nov 2023 11:44:20 -0500 Subject: [PATCH 015/187] s390/vfio-ap: unpin pages on gisc registration failure commit 7b2d039da622daa9ba259ac6f38701d542b237c3 upstream. In the vfio_ap_irq_enable function, after the page containing the notification indicator byte (NIB) is pinned, the function attempts to register the guest ISC. If registration fails, the function sets the status response code and returns without unpinning the page containing the NIB. In order to avoid a memory leak, the NIB should be unpinned before returning from the vfio_ap_irq_enable function. Co-developed-by: Janosch Frank Signed-off-by: Janosch Frank Signed-off-by: Anthony Krowiak Reviewed-by: Matthew Rosato Fixes: 783f0a3ccd79 ("s390/vfio-ap: add s390dbf logging to the vfio_ap_irq_enable function") Cc: Link: https://lore.kernel.org/r/20231109164427.460493-2-akrowiak@linux.ibm.com Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- drivers/s390/crypto/vfio_ap_ops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 934515959ebf..b8c933ee13a0 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -425,6 +425,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n", __func__, nisc, isc, q->apqn); + vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1); status.response_code = AP_RESPONSE_INVALID_GISA; return status; } From 8a7729cda2dd276d7a3994638038fb89035b6f2c Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 24 Oct 2023 20:30:15 +0200 Subject: [PATCH 016/187] PM / devfreq: Fix buffer overflow in trans_stat_show commit 08e23d05fa6dc4fc13da0ccf09defdd4bbc92ff4 upstream. Fix buffer overflow in trans_stat_show(). Convert simple snprintf to the more secure scnprintf with size of PAGE_SIZE. Add condition checking if we are exceeding PAGE_SIZE and exit early from loop. Also add at the end a warning that we exceeded PAGE_SIZE and that stats is disabled. Return -EFBIG in the case where we don't have enough space to write the full transition table. Also document in the ABI that this function can return -EFBIG error. Link: https://lore.kernel.org/all/20231024183016.14648-2-ansuelsmth@gmail.com/ Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218041 Fixes: e552bbaf5b98 ("PM / devfreq: Add sysfs node for representing frequency transition information.") Signed-off-by: Christian Marangi Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-devfreq | 3 + drivers/devfreq/devfreq.c | 59 +++++++++++++------ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 5e6b74f30406..1e7e0bb4c14e 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -52,6 +52,9 @@ Description: echo 0 > /sys/class/devfreq/.../trans_stat + If the transition table is bigger than PAGE_SIZE, reading + this will return an -EFBIG error. + What: /sys/class/devfreq/.../available_frequencies Date: October 2012 Contact: Nishanth Menon diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index fe6644f99887..2951a87ccb97 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1687,7 +1687,7 @@ static ssize_t trans_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct devfreq *df = to_devfreq(dev); - ssize_t len; + ssize_t len = 0; int i, j; unsigned int max_state; @@ -1696,7 +1696,7 @@ static ssize_t trans_stat_show(struct device *dev, max_state = df->max_state; if (max_state == 0) - return sprintf(buf, "Not Supported.\n"); + return scnprintf(buf, PAGE_SIZE, "Not Supported.\n"); mutex_lock(&df->lock); if (!df->stop_polling && @@ -1706,31 +1706,52 @@ static ssize_t trans_stat_show(struct device *dev, } mutex_unlock(&df->lock); - len = sprintf(buf, " From : To\n"); - len += sprintf(buf + len, " :"); - for (i = 0; i < max_state; i++) - len += sprintf(buf + len, "%10lu", - df->freq_table[i]); + len += scnprintf(buf + len, PAGE_SIZE - len, " From : To\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " :"); + for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu", + df->freq_table[i]); + } + if (len >= PAGE_SIZE - 1) + return PAGE_SIZE - 1; - len += sprintf(buf + len, " time(ms)\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " time(ms)\n"); for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; if (df->freq_table[i] == df->previous_freq) - len += sprintf(buf + len, "*"); + len += scnprintf(buf + len, PAGE_SIZE - len, "*"); else - len += sprintf(buf + len, " "); + len += scnprintf(buf + len, PAGE_SIZE - len, " "); + if (len >= PAGE_SIZE - 1) + break; - len += sprintf(buf + len, "%10lu:", df->freq_table[i]); - for (j = 0; j < max_state; j++) - len += sprintf(buf + len, "%10u", - df->stats.trans_table[(i * max_state) + j]); - - len += sprintf(buf + len, "%10llu\n", (u64) - jiffies64_to_msecs(df->stats.time_in_state[i])); + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu:", + df->freq_table[i]); + for (j = 0; j < max_state; j++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10u", + df->stats.trans_table[(i * max_state) + j]); + } + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10llu\n", (u64) + jiffies64_to_msecs(df->stats.time_in_state[i])); + } + + if (len < PAGE_SIZE - 1) + len += scnprintf(buf + len, PAGE_SIZE - len, "Total transition : %u\n", + df->stats.total_trans); + + if (len >= PAGE_SIZE - 1) { + pr_warn_once("devfreq transition table exceeds PAGE_SIZE. Disabling\n"); + return -EFBIG; } - len += sprintf(buf + len, "Total transition : %u\n", - df->stats.total_trans); return len; } From e8757cd139ec3b4a22b1415a317ddf37492bbd9c Mon Sep 17 00:00:00 2001 From: Bingbu Cao Date: Wed, 22 Nov 2023 17:46:06 +0800 Subject: [PATCH 017/187] media: imx355: Enable runtime PM before registering async sub-device commit efa5fe19c0a9199f49e36e1f5242ed5c88da617d upstream. As the sensor device maybe accessible right after its async sub-device is registered, such as ipu-bridge will try to power up sensor by sensor's client device's runtime PM from the async notifier callback, if runtime PM is not enabled, it will fail. So runtime PM should be ready before its async sub-device is registered and accessible by others. Fixes: df0b5c4a7ddd ("media: add imx355 camera sensor driver") Cc: stable@vger.kernel.org Signed-off-by: Bingbu Cao Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/imx355.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/media/i2c/imx355.c b/drivers/media/i2c/imx355.c index b46178681c05..7fb02ba56012 100644 --- a/drivers/media/i2c/imx355.c +++ b/drivers/media/i2c/imx355.c @@ -1784,10 +1784,6 @@ static int imx355_probe(struct i2c_client *client) goto error_handler_free; } - ret = v4l2_async_register_subdev_sensor(&imx355->sd); - if (ret < 0) - goto error_media_entity; - /* * Device is already turned on by i2c-core with ACPI domain PM. * Enable runtime PM and turn off the device. @@ -1796,9 +1792,15 @@ static int imx355_probe(struct i2c_client *client) pm_runtime_enable(&client->dev); pm_runtime_idle(&client->dev); + ret = v4l2_async_register_subdev_sensor(&imx355->sd); + if (ret < 0) + goto error_media_entity_runtime_pm; + return 0; -error_media_entity: +error_media_entity_runtime_pm: + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); media_entity_cleanup(&imx355->sd.entity); error_handler_free: From f4bb1d5daf77b1a95a43277268adf0d1430c2346 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Fri, 15 Dec 2023 10:00:49 +0800 Subject: [PATCH 018/187] rpmsg: virtio: Free driver_override when rpmsg_remove() commit d5362c37e1f8a40096452fc201c30e705750e687 upstream. Free driver_override when rpmsg_remove(), otherwise the following memory leak will occur: unreferenced object 0xffff0000d55d7080 (size 128): comm "kworker/u8:2", pid 56, jiffies 4294893188 (age 214.272s) hex dump (first 32 bytes): 72 70 6d 73 67 5f 6e 73 00 00 00 00 00 00 00 00 rpmsg_ns........ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000009c94c9c1>] __kmem_cache_alloc_node+0x1f8/0x320 [<000000002300d89b>] __kmalloc_node_track_caller+0x44/0x70 [<00000000228a60c3>] kstrndup+0x4c/0x90 [<0000000077158695>] driver_set_override+0xd0/0x164 [<000000003e9c4ea5>] rpmsg_register_device_override+0x98/0x170 [<000000001c0c89a8>] rpmsg_ns_register_device+0x24/0x30 [<000000008bbf8fa2>] rpmsg_probe+0x2e0/0x3ec [<00000000e65a68df>] virtio_dev_probe+0x1c0/0x280 [<00000000443331cc>] really_probe+0xbc/0x2dc [<00000000391064b1>] __driver_probe_device+0x78/0xe0 [<00000000a41c9a5b>] driver_probe_device+0xd8/0x160 [<000000009c3bd5df>] __device_attach_driver+0xb8/0x140 [<0000000043cd7614>] bus_for_each_drv+0x7c/0xd4 [<000000003b929a36>] __device_attach+0x9c/0x19c [<00000000a94e0ba8>] device_initial_probe+0x14/0x20 [<000000003c999637>] bus_probe_device+0xa0/0xac Signed-off-by: Xiaolei Wang Fixes: b0b03b811963 ("rpmsg: Release rpmsg devices in backends") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231215020049.78750-1-xiaolei.wang@windriver.com Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/virtio_rpmsg_bus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 905ac7910c98..f1af0f674615 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -378,6 +378,7 @@ static void virtio_rpmsg_release_device(struct device *dev) struct rpmsg_device *rpdev = to_rpmsg_device(dev); struct virtio_rpmsg_channel *vch = to_virtio_rpmsg_channel(rpdev); + kfree(rpdev->driver_override); kfree(vch); } From 51a7c02bc74266f389517d9099d581ce8387c078 Mon Sep 17 00:00:00 2001 From: Bingbu Cao Date: Wed, 22 Nov 2023 17:46:09 +0800 Subject: [PATCH 019/187] media: ov9734: Enable runtime PM before registering async sub-device commit e242e9c144050ed120cf666642ba96b7c4462a4c upstream. As the sensor device maybe accessible right after its async sub-device is registered, such as ipu-bridge will try to power up sensor by sensor's client device's runtime PM from the async notifier callback, if runtime PM is not enabled, it will fail. So runtime PM should be ready before its async sub-device is registered and accessible by others. Fixes: d3f863a63fe4 ("media: i2c: Add ov9734 image sensor driver") Cc: stable@vger.kernel.org Signed-off-by: Bingbu Cao Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/ov9734.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/media/i2c/ov9734.c b/drivers/media/i2c/ov9734.c index 8b0a158cb297..c5592ddb0cff 100644 --- a/drivers/media/i2c/ov9734.c +++ b/drivers/media/i2c/ov9734.c @@ -939,6 +939,7 @@ static void ov9734_remove(struct i2c_client *client) media_entity_cleanup(&sd->entity); v4l2_ctrl_handler_free(sd->ctrl_handler); pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); mutex_destroy(&ov9734->mutex); } @@ -984,13 +985,6 @@ static int ov9734_probe(struct i2c_client *client) goto probe_error_v4l2_ctrl_handler_free; } - ret = v4l2_async_register_subdev_sensor(&ov9734->sd); - if (ret < 0) { - dev_err(&client->dev, "failed to register V4L2 subdev: %d", - ret); - goto probe_error_media_entity_cleanup; - } - /* * Device is already turned on by i2c-core with ACPI domain PM. * Enable runtime PM and turn off the device. @@ -999,9 +993,18 @@ static int ov9734_probe(struct i2c_client *client) pm_runtime_enable(&client->dev); pm_runtime_idle(&client->dev); + ret = v4l2_async_register_subdev_sensor(&ov9734->sd); + if (ret < 0) { + dev_err(&client->dev, "failed to register V4L2 subdev: %d", + ret); + goto probe_error_media_entity_cleanup_pm; + } + return 0; -probe_error_media_entity_cleanup: +probe_error_media_entity_cleanup_pm: + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); media_entity_cleanup(&ov9734->sd.entity); probe_error_v4l2_ctrl_handler_free: From d6b8d034b576f406af920a7bee81606c027b24c6 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Mon, 15 Jan 2024 13:54:31 -0500 Subject: [PATCH 020/187] s390/vfio-ap: always filter entire AP matrix commit 850fb7fa8c684a4c6bf0e4b6978f4ddcc5d43d11 upstream. The vfio_ap_mdev_filter_matrix function is called whenever a new adapter or domain is assigned to the mdev. The purpose of the function is to update the guest's AP configuration by filtering the matrix of adapters and domains assigned to the mdev. When an adapter or domain is assigned, only the APQNs associated with the APID of the new adapter or APQI of the new domain are inspected. If an APQN does not reference a queue device bound to the vfio_ap device driver, then it's APID will be filtered from the mdev's matrix when updating the guest's AP configuration. Inspecting only the APID of the new adapter or APQI of the new domain will result in passing AP queues through to a guest that are not bound to the vfio_ap device driver under certain circumstances. Consider the following: guest's AP configuration (all also assigned to the mdev's matrix): 14.0004 14.0005 14.0006 16.0004 16.0005 16.0006 unassign domain 4 unbind queue 16.0005 assign domain 4 When domain 4 is re-assigned, since only domain 4 will be inspected, the APQNs that will be examined will be: 14.0004 16.0004 Since both of those APQNs reference queue devices that are bound to the vfio_ap device driver, nothing will get filtered from the mdev's matrix when updating the guest's AP configuration. Consequently, queue 16.0005 will get passed through despite not being bound to the driver. This violates the linux device model requirement that a guest shall only be given access to devices bound to the device driver facilitating their pass-through. To resolve this problem, every adapter and domain assigned to the mdev will be inspected when filtering the mdev's matrix. Signed-off-by: Tony Krowiak Acked-by: Halil Pasic Fixes: 48cae940c31d ("s390/vfio-ap: refresh guest's APCB by filtering AP resources assigned to mdev") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240115185441.31526-2-akrowiak@linux.ibm.com Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- drivers/s390/crypto/vfio_ap_ops.c | 55 +++++++++---------------------- 1 file changed, 16 insertions(+), 39 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index b8c933ee13a0..b0af16f1f50e 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -639,8 +639,7 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev) * Return: a boolean value indicating whether the KVM guest's APCB was changed * by the filtering or not. */ -static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, - struct ap_matrix_mdev *matrix_mdev) +static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev) { unsigned long apid, apqi, apqn; DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES); @@ -661,8 +660,8 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm, (unsigned long *)matrix_dev->info.aqm, AP_DOMAINS); - for_each_set_bit_inv(apid, apm, AP_DEVICES) { - for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { + for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) { + for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) { /* * If the APQN is not bound to the vfio_ap device * driver, then we can't assign it to the guest's @@ -931,7 +930,6 @@ static ssize_t assign_adapter_store(struct device *dev, { int ret; unsigned long apid; - DECLARE_BITMAP(apm_delta, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&ap_perms_mutex); @@ -960,11 +958,8 @@ static ssize_t assign_adapter_store(struct device *dev, } vfio_ap_mdev_link_adapter(matrix_mdev, apid); - memset(apm_delta, 0, sizeof(apm_delta)); - set_bit_inv(apid, apm_delta); - if (vfio_ap_mdev_filter_matrix(apm_delta, - matrix_mdev->matrix.aqm, matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev)) vfio_ap_mdev_update_guest_apcb(matrix_mdev); ret = count; @@ -1140,7 +1135,6 @@ static ssize_t assign_domain_store(struct device *dev, { int ret; unsigned long apqi; - DECLARE_BITMAP(aqm_delta, AP_DOMAINS); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&ap_perms_mutex); @@ -1169,11 +1163,8 @@ static ssize_t assign_domain_store(struct device *dev, } vfio_ap_mdev_link_domain(matrix_mdev, apqi); - memset(aqm_delta, 0, sizeof(aqm_delta)); - set_bit_inv(apqi, aqm_delta); - if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, aqm_delta, - matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev)) vfio_ap_mdev_update_guest_apcb(matrix_mdev); ret = count; @@ -1859,9 +1850,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) if (matrix_mdev) { vfio_ap_mdev_link_queue(matrix_mdev, q); - if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, - matrix_mdev->matrix.aqm, - matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev)) vfio_ap_mdev_update_guest_apcb(matrix_mdev); } dev_set_drvdata(&apdev->device, q); @@ -2212,34 +2201,22 @@ void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info, static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev) { - bool do_hotplug = false; - int filter_domains = 0; - int filter_adapters = 0; - DECLARE_BITMAP(apm, AP_DEVICES); - DECLARE_BITMAP(aqm, AP_DOMAINS); + bool filter_domains, filter_adapters, filter_cdoms, do_hotplug = false; mutex_lock(&matrix_mdev->kvm->lock); mutex_lock(&matrix_dev->mdevs_lock); - filter_adapters = bitmap_and(apm, matrix_mdev->matrix.apm, - matrix_mdev->apm_add, AP_DEVICES); - filter_domains = bitmap_and(aqm, matrix_mdev->matrix.aqm, - matrix_mdev->aqm_add, AP_DOMAINS); + filter_adapters = bitmap_intersects(matrix_mdev->matrix.apm, + matrix_mdev->apm_add, AP_DEVICES); + filter_domains = bitmap_intersects(matrix_mdev->matrix.aqm, + matrix_mdev->aqm_add, AP_DOMAINS); + filter_cdoms = bitmap_intersects(matrix_mdev->matrix.adm, + matrix_mdev->adm_add, AP_DOMAINS); - if (filter_adapters && filter_domains) - do_hotplug |= vfio_ap_mdev_filter_matrix(apm, aqm, matrix_mdev); - else if (filter_adapters) - do_hotplug |= - vfio_ap_mdev_filter_matrix(apm, - matrix_mdev->shadow_apcb.aqm, - matrix_mdev); - else - do_hotplug |= - vfio_ap_mdev_filter_matrix(matrix_mdev->shadow_apcb.apm, - aqm, matrix_mdev); + if (filter_adapters || filter_domains) + do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev); - if (bitmap_intersects(matrix_mdev->matrix.adm, matrix_mdev->adm_add, - AP_DOMAINS)) + if (filter_cdoms) do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev); if (do_hotplug) From baf3fcb268fd379a4b5d73d7cd294ee2c02a93f3 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Mon, 15 Jan 2024 13:54:32 -0500 Subject: [PATCH 021/187] s390/vfio-ap: loop over the shadow APCB when filtering guest's AP configuration commit 16fb78cbf56e42b8efb2682a4444ab59e32e7959 upstream. While filtering the mdev matrix, it doesn't make sense - and will have unexpected results - to filter an APID from the matrix if the APID or one of the associated APQIs is not in the host's AP configuration. There are two reasons for this: 1. An adapter or domain that is not in the host's AP configuration can be assigned to the matrix; this is known as over-provisioning. Queue devices, however, are only created for adapters and domains in the host's AP configuration, so there will be no queues associated with an over-provisioned adapter or domain to filter. 2. The adapter or domain may have been externally removed from the host's configuration via an SE or HMC attached to a DPM enabled LPAR. In this case, the vfio_ap device driver would have been notified by the AP bus via the on_config_changed callback and the adapter or domain would have already been filtered. Since the matrix_mdev->shadow_apcb.apm and matrix_mdev->shadow_apcb.aqm are copied from the mdev matrix sans the APIDs and APQIs not in the host's AP configuration, let's loop over those bitmaps instead of those assigned to the matrix. Signed-off-by: Tony Krowiak Reviewed-by: Halil Pasic Fixes: 48cae940c31d ("s390/vfio-ap: refresh guest's APCB by filtering AP resources assigned to mdev") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240115185441.31526-3-akrowiak@linux.ibm.com Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- drivers/s390/crypto/vfio_ap_ops.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index b0af16f1f50e..50c7d519b3a6 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -660,8 +660,9 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev) bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm, (unsigned long *)matrix_dev->info.aqm, AP_DOMAINS); - for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) { - for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) { + for_each_set_bit_inv(apid, matrix_mdev->shadow_apcb.apm, AP_DEVICES) { + for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, + AP_DOMAINS) { /* * If the APQN is not bound to the vfio_ap device * driver, then we can't assign it to the guest's From 6690a0acbbd99f71cb88a31929e05e302a614ba8 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Mon, 15 Jan 2024 13:54:33 -0500 Subject: [PATCH 022/187] s390/vfio-ap: let on_scan_complete() callback filter matrix and update guest's APCB commit 774d10196e648e2c0b78da817f631edfb3dfa557 upstream. When adapters and/or domains are added to the host's AP configuration, this may result in multiple queue devices getting created and probed by the vfio_ap device driver. For each queue device probed, the matrix of adapters and domains assigned to a matrix mdev will be filtered to update the guest's APCB. If any adapters or domains get added to or removed from the APCB, the guest's AP configuration will be dynamically updated (i.e., hot plug/unplug). To dynamically update the guest's configuration, its VCPUs must be taken out of SIE for the period of time it takes to make the update. This is disruptive to the guest's operation and if there are many queues probed due to a change in the host's AP configuration, this could be troublesome. The problem is exacerbated by the fact that the 'on_scan_complete' callback also filters the mdev's matrix and updates the guest's AP configuration. In order to reduce the potential amount of disruption to the guest that may result from a change to the host's AP configuration, let's bypass the filtering of the matrix and updating of the guest's AP configuration in the probe callback - if due to a host config change - and defer it until the 'on_scan_complete' callback is invoked after the AP bus finishes its device scan operation. This way the filtering and updating will be performed only once regardless of the number of queues added. Signed-off-by: Tony Krowiak Reviewed-by: Halil Pasic Fixes: 48cae940c31d ("s390/vfio-ap: refresh guest's APCB by filtering AP resources assigned to mdev") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240115185441.31526-4-akrowiak@linux.ibm.com Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- drivers/s390/crypto/vfio_ap_ops.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 50c7d519b3a6..0659aa2863ab 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1851,9 +1851,22 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) if (matrix_mdev) { vfio_ap_mdev_link_queue(matrix_mdev, q); + /* + * If we're in the process of handling the adding of adapters or + * domains to the host's AP configuration, then let the + * vfio_ap device driver's on_scan_complete callback filter the + * matrix and update the guest's AP configuration after all of + * the new queue devices are probed. + */ + if (!bitmap_empty(matrix_mdev->apm_add, AP_DEVICES) || + !bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS)) + goto done; + if (vfio_ap_mdev_filter_matrix(matrix_mdev)) vfio_ap_mdev_update_guest_apcb(matrix_mdev); } + +done: dev_set_drvdata(&apdev->device, q); release_update_locks_for_mdev(matrix_mdev); From 574f69b46b232742737e68892d91da89dbf59d6e Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Sat, 2 Dec 2023 14:14:20 +0300 Subject: [PATCH 023/187] mips: Fix max_mapnr being uninitialized on early stages commit e1a9ae45736989c972a8d1c151bc390678ae6205 upstream. max_mapnr variable is utilized in the pfn_valid() method in order to determine the upper PFN space boundary. Having it uninitialized effectively makes any PFN passed to that method invalid. That in its turn causes the kernel mm-subsystem occasion malfunctions even after the max_mapnr variable is actually properly updated. For instance, pfn_valid() is called in the init_unavailable_range() method in the framework of the calls-chain on MIPS: setup_arch() +-> paging_init() +-> free_area_init() +-> memmap_init() +-> memmap_init_zone_range() +-> init_unavailable_range() Since pfn_valid() always returns "false" value before max_mapnr is initialized in the mem_init() method, any flatmem page-holes will be left in the poisoned/uninitialized state including the IO-memory pages. Thus any further attempts to map/remap the IO-memory by using MMU may fail. In particular it happened in my case on attempt to map the SRAM region. The kernel bootup procedure just crashed on the unhandled unaligned access bug raised in the __update_cache() method: > Unhandled kernel unaligned access[#1]: > CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.7.0-rc1-XXX-dirty #2056 > ... > Call Trace: > [<8011ef9c>] __update_cache+0x88/0x1bc > [<80385944>] ioremap_page_range+0x110/0x2a4 > [<80126948>] ioremap_prot+0x17c/0x1f4 > [<80711b80>] __devm_ioremap+0x8c/0x120 > [<80711e0c>] __devm_ioremap_resource+0xf4/0x218 > [<808bf244>] sram_probe+0x4f4/0x930 > [<80889d20>] platform_probe+0x68/0xec > ... Let's fix the problem by initializing the max_mapnr variable as soon as the required data is available. In particular it can be done right in the paging_init() method before free_area_init() is called since all the PFN zone boundaries have already been calculated by that time. Cc: stable@vger.kernel.org Signed-off-by: Serge Semin Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/init.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 5a8002839550..e8660d06f663 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -417,7 +417,12 @@ void __init paging_init(void) (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10)); max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; } + + max_mapnr = highend_pfn ? highend_pfn : max_low_pfn; +#else + max_mapnr = max_low_pfn; #endif + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); free_area_init(max_zone_pfns); } @@ -453,13 +458,6 @@ void __init mem_init(void) */ BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (_PFN_SHIFT > PAGE_SHIFT)); -#ifdef CONFIG_HIGHMEM - max_mapnr = highend_pfn ? highend_pfn : max_low_pfn; -#else - max_mapnr = max_low_pfn; -#endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - maar_init(); memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ From 2df39ac8f813860f79782807c3f7acff40b3c551 Mon Sep 17 00:00:00 2001 From: Krishna chaitanya chundru Date: Tue, 31 Oct 2023 15:21:05 +0530 Subject: [PATCH 024/187] bus: mhi: host: Add alignment check for event ring read pointer commit eff9704f5332a13b08fbdbe0f84059c9e7051d5f upstream. Though we do check the event ring read pointer by "is_valid_ring_ptr" to make sure it is in the buffer range, but there is another risk the pointer may be not aligned. Since we are expecting event ring elements are 128 bits(struct mhi_ring_element) aligned, an unaligned read pointer could lead to multiple issues like DoS or ring buffer memory corruption. So add a alignment check for event ring read pointer. Fixes: ec32332df764 ("bus: mhi: core: Sanity check values from remote device before use") cc: stable@vger.kernel.org Signed-off-by: Krishna chaitanya chundru Reviewed-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20231031-alignment_check-v2-1-1441db7c5efd@quicinc.com Signed-off-by: Manivannan Sadhasivam Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/host/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c index 0c3a009ed9bb..3743b43eceda 100644 --- a/drivers/bus/mhi/host/main.c +++ b/drivers/bus/mhi/host/main.c @@ -268,7 +268,8 @@ static void mhi_del_ring_element(struct mhi_controller *mhi_cntrl, static bool is_valid_ring_ptr(struct mhi_ring *ring, dma_addr_t addr) { - return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len; + return addr >= ring->iommu_base && addr < ring->iommu_base + ring->len && + !(addr & (sizeof(struct mhi_ring_element) - 1)); } int mhi_destroy_device(struct device *dev, void *data) From 3c5ec66b4b3f6816f3a6161538672e389e537690 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Mon, 11 Dec 2023 14:42:52 +0800 Subject: [PATCH 025/187] bus: mhi: host: Drop chan lock before queuing buffers commit 01bd694ac2f682fb8017e16148b928482bc8fa4b upstream. Ensure read and write locks for the channel are not taken in succession by dropping the read lock from parse_xfer_event() such that a callback given to client can potentially queue buffers and acquire the write lock in that process. Any queueing of buffers should be done without channel read lock acquired as it can result in multiple locks and a soft lockup. Cc: # 5.7 Fixes: 1d3173a3bae7 ("bus: mhi: core: Add support for processing events from client device") Signed-off-by: Qiang Yu Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1702276972-41296-3-git-send-email-quic_qianyu@quicinc.com [mani: added fixes tag and cc'ed stable] Signed-off-by: Manivannan Sadhasivam Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/host/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c index 3743b43eceda..a60ba20b7d8b 100644 --- a/drivers/bus/mhi/host/main.c +++ b/drivers/bus/mhi/host/main.c @@ -643,6 +643,8 @@ static int parse_xfer_event(struct mhi_controller *mhi_cntrl, mhi_del_ring_element(mhi_cntrl, tre_ring); local_rp = tre_ring->rp; + read_unlock_bh(&mhi_chan->lock); + /* notify client */ mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result); @@ -668,6 +670,8 @@ static int parse_xfer_event(struct mhi_controller *mhi_cntrl, kfree(buf_info->cb_buf); } } + + read_lock_bh(&mhi_chan->lock); } break; } /* CC_EOT */ From 0b093176fd0967a5f56e2c86b0d48247f6c0fa0f Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Mon, 11 Dec 2023 14:42:51 +0800 Subject: [PATCH 026/187] bus: mhi: host: Add spinlock to protect WP access when queueing TREs commit b89b6a863dd53bc70d8e52d50f9cfaef8ef5e9c9 upstream. Protect WP accesses such that multiple threads queueing buffers for incoming data do not race. Meanwhile, if CONFIG_TRACE_IRQFLAGS is enabled, irq will be enabled once __local_bh_enable_ip is called as part of write_unlock_bh. Hence, let's take irqsave lock after TRE is generated to avoid running write_unlock_bh when irqsave lock is held. Cc: stable@vger.kernel.org Fixes: 189ff97cca53 ("bus: mhi: core: Add support for data transfer") Signed-off-by: Bhaumik Bhatt Signed-off-by: Qiang Yu Reviewed-by: Jeffrey Hugo Tested-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1702276972-41296-2-git-send-email-quic_qianyu@quicinc.com Signed-off-by: Manivannan Sadhasivam Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/host/main.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c index a60ba20b7d8b..8378c3319cd5 100644 --- a/drivers/bus/mhi/host/main.c +++ b/drivers/bus/mhi/host/main.c @@ -1124,17 +1124,15 @@ static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info, if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))) return -EIO; - read_lock_irqsave(&mhi_cntrl->pm_lock, flags); - ret = mhi_is_ring_full(mhi_cntrl, tre_ring); - if (unlikely(ret)) { - ret = -EAGAIN; - goto exit_unlock; - } + if (unlikely(ret)) + return -EAGAIN; ret = mhi_gen_tre(mhi_cntrl, mhi_chan, buf_info, mflags); if (unlikely(ret)) - goto exit_unlock; + return ret; + + read_lock_irqsave(&mhi_cntrl->pm_lock, flags); /* Packet is queued, take a usage ref to exit M3 if necessary * for host->device buffer, balanced put is done on buffer completion @@ -1154,7 +1152,6 @@ static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info, if (dir == DMA_FROM_DEVICE) mhi_cntrl->runtime_put(mhi_cntrl); -exit_unlock: read_unlock_irqrestore(&mhi_cntrl->pm_lock, flags); return ret; @@ -1206,6 +1203,9 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, int eot, eob, chain, bei; int ret; + /* Protect accesses for reading and incrementing WP */ + write_lock_bh(&mhi_chan->lock); + buf_ring = &mhi_chan->buf_ring; tre_ring = &mhi_chan->tre_ring; @@ -1223,8 +1223,10 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, if (!info->pre_mapped) { ret = mhi_cntrl->map_single(mhi_cntrl, buf_info); - if (ret) + if (ret) { + write_unlock_bh(&mhi_chan->lock); return ret; + } } eob = !!(flags & MHI_EOB); @@ -1241,6 +1243,8 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan, mhi_add_ring_element(mhi_cntrl, tre_ring); mhi_add_ring_element(mhi_cntrl, buf_ring); + write_unlock_bh(&mhi_chan->lock); + return 0; } From 71602d95ae6401382a359ca7fced6c7a9dda0104 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 3 Jan 2024 21:02:16 +0100 Subject: [PATCH 027/187] parisc/firmware: Fix F-extend for PDC addresses commit 735ae74f73e55c191d48689bd11ff4a06ea0508f upstream. When running with narrow firmware (64-bit kernel using a 32-bit firmware), extend PDC addresses into the 0xfffffff0.00000000 region instead of the 0xf0f0f0f0.00000000 region. This fixes the power button on the C3700 machine in qemu (64-bit CPU with 32-bit firmware), and my assumption is that the previous code was really never used (because most 64-bit machines have a 64-bit firmware), or it just worked on very old machines because they may only decode 40-bit of virtual addresses. Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/firmware.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 3e051a973e9b..92276bcf19d7 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -123,10 +123,10 @@ static unsigned long f_extend(unsigned long address) #ifdef CONFIG_64BIT if(unlikely(parisc_narrow_firmware)) { if((address & 0xff000000) == 0xf0000000) - return 0xf0f0f0f000000000UL | (u32)address; + return (0xfffffff0UL << 32) | (u32)address; if((address & 0xf0000000) == 0xf0000000) - return 0xffffffff00000000UL | (u32)address; + return (0xffffffffUL << 32) | (u32)address; } #endif return address; From b37c1b0db14db0db487afa003e9fc0f47beb98de Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 3 Jan 2024 21:17:23 +0100 Subject: [PATCH 028/187] parisc/power: Fix power soft-off button emulation on qemu commit 6472036581f947109b20664121db1d143e916f0b upstream. Make sure to start the kthread to check the power button on qemu as well if the power button address was provided. This fixes the qemu built-in system_powerdown runtime command. Fixes: d0c219472980 ("parisc/power: Add power soft-off when running on qemu") Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c index 332bcc0053a5..498bae2e3403 100644 --- a/drivers/parisc/power.c +++ b/drivers/parisc/power.c @@ -238,7 +238,7 @@ static int __init power_init(void) if (running_on_qemu && soft_power_reg) register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, SYS_OFF_PRIO_DEFAULT, qemu_power_off, (void *)soft_power_reg); - else + if (!running_on_qemu || soft_power_reg) power_task = kthread_run(kpowerswd, (void*)soft_power_reg, KTHREAD_NAME); if (IS_ERR(power_task)) { From 6e8aab4de7a9be8116b41c424300edb8f2ad05cf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 27 Dec 2023 21:37:02 +0100 Subject: [PATCH 029/187] async: Split async_schedule_node_domain() commit 6aa09a5bccd8e224d917afdb4c278fc66aacde4d upstream. In preparation for subsequent changes, split async_schedule_node_domain() in two pieces so as to allow the bottom part of it to be called from a somewhat different code path. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka Tested-by: Youngmin Nam Reviewed-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- kernel/async.c | 56 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/kernel/async.c b/kernel/async.c index b2c4ba5686ee..cffe6b4cff9f 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -145,6 +145,39 @@ static void async_run_entry_fn(struct work_struct *work) wake_up(&async_done); } +static async_cookie_t __async_schedule_node_domain(async_func_t func, + void *data, int node, + struct async_domain *domain, + struct async_entry *entry) +{ + async_cookie_t newcookie; + unsigned long flags; + + INIT_LIST_HEAD(&entry->domain_list); + INIT_LIST_HEAD(&entry->global_list); + INIT_WORK(&entry->work, async_run_entry_fn); + entry->func = func; + entry->data = data; + entry->domain = domain; + + spin_lock_irqsave(&async_lock, flags); + + /* allocate cookie and queue */ + newcookie = entry->cookie = next_cookie++; + + list_add_tail(&entry->domain_list, &domain->pending); + if (domain->registered) + list_add_tail(&entry->global_list, &async_global_pending); + + atomic_inc(&entry_count); + spin_unlock_irqrestore(&async_lock, flags); + + /* schedule for execution */ + queue_work_node(node, system_unbound_wq, &entry->work); + + return newcookie; +} + /** * async_schedule_node_domain - NUMA specific version of async_schedule_domain * @func: function to execute asynchronously @@ -186,29 +219,8 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, func(data, newcookie); return newcookie; } - INIT_LIST_HEAD(&entry->domain_list); - INIT_LIST_HEAD(&entry->global_list); - INIT_WORK(&entry->work, async_run_entry_fn); - entry->func = func; - entry->data = data; - entry->domain = domain; - spin_lock_irqsave(&async_lock, flags); - - /* allocate cookie and queue */ - newcookie = entry->cookie = next_cookie++; - - list_add_tail(&entry->domain_list, &domain->pending); - if (domain->registered) - list_add_tail(&entry->global_list, &async_global_pending); - - atomic_inc(&entry_count); - spin_unlock_irqrestore(&async_lock, flags); - - /* schedule for execution */ - queue_work_node(node, system_unbound_wq, &entry->work); - - return newcookie; + return __async_schedule_node_domain(func, data, node, domain, entry); } EXPORT_SYMBOL_GPL(async_schedule_node_domain); From fcf8e37152505e158d77da3f578b01d958a06468 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 27 Dec 2023 21:38:23 +0100 Subject: [PATCH 030/187] async: Introduce async_schedule_dev_nocall() commit 7d4b5d7a37bdd63a5a3371b988744b060d5bb86f upstream. In preparation for subsequent changes, introduce a specialized variant of async_schedule_dev() that will not invoke the argument function synchronously when it cannot be scheduled for asynchronous execution. The new function, async_schedule_dev_nocall(), will be used for fixing possible deadlocks in the system-wide power management core code. Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka for the series. Tested-by: Youngmin Nam Reviewed-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- include/linux/async.h | 2 ++ kernel/async.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/include/linux/async.h b/include/linux/async.h index cce4ad31e8fc..33c9ff4afb49 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -90,6 +90,8 @@ async_schedule_dev(async_func_t func, struct device *dev) return async_schedule_node(func, dev, dev_to_node(dev)); } +bool async_schedule_dev_nocall(async_func_t func, struct device *dev); + /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously diff --git a/kernel/async.c b/kernel/async.c index cffe6b4cff9f..673bba6bdf3a 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -243,6 +243,35 @@ async_cookie_t async_schedule_node(async_func_t func, void *data, int node) } EXPORT_SYMBOL_GPL(async_schedule_node); +/** + * async_schedule_dev_nocall - A simplified variant of async_schedule_dev() + * @func: function to execute asynchronously + * @dev: device argument to be passed to function + * + * @dev is used as both the argument for the function and to provide NUMA + * context for where to run the function. + * + * If the asynchronous execution of @func is scheduled successfully, return + * true. Otherwise, do nothing and return false, unlike async_schedule_dev() + * that will run the function synchronously then. + */ +bool async_schedule_dev_nocall(async_func_t func, struct device *dev) +{ + struct async_entry *entry; + + entry = kzalloc(sizeof(struct async_entry), GFP_KERNEL); + + /* Give up if there is no memory or too much work. */ + if (!entry || atomic_read(&entry_count) > MAX_WORK) { + kfree(entry); + return false; + } + + __async_schedule_node_domain(func, dev, dev_to_node(dev), + &async_dfl_domain, entry); + return true; +} + /** * async_synchronize_full - synchronize all asynchronous function calls * From 0c8ada71d98033c796a7e7715152107c9c5e61c5 Mon Sep 17 00:00:00 2001 From: Marcelo Schmitt Date: Tue, 19 Dec 2023 17:26:27 -0300 Subject: [PATCH 031/187] iio: adc: ad7091r: Enable internal vref if external vref is not supplied [ Upstream commit e71c5c89bcb165a02df35325aa13d1ee40112401 ] The ADC needs a voltage reference to work correctly. Users can provide an external voltage reference or use the chip internal reference to operate the ADC. The availability of an in chip reference for the ADC saves the user from having to supply an external voltage reference, which makes the external reference an optional property as described in the device tree documentation. Though, to use the internal reference, it must be enabled by writing to the configuration register. Enable AD7091R internal voltage reference if no external vref is supplied. Fixes: 260442cc5be4 ("iio: adc: ad7091r5: Add scale and external VREF support") Signed-off-by: Marcelo Schmitt Link: https://lore.kernel.org/r/b865033fa6a4fc4bf2b4a98ec51a6144e0f64f77.1703013352.git.marcelo.schmitt1@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/ad7091r-base.c | 7 +++++++ drivers/iio/adc/ad7091r-base.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/drivers/iio/adc/ad7091r-base.c b/drivers/iio/adc/ad7091r-base.c index 3d36bcd26b0c..76002b91c86a 100644 --- a/drivers/iio/adc/ad7091r-base.c +++ b/drivers/iio/adc/ad7091r-base.c @@ -405,7 +405,14 @@ int ad7091r_probe(struct device *dev, const char *name, if (IS_ERR(st->vref)) { if (PTR_ERR(st->vref) == -EPROBE_DEFER) return -EPROBE_DEFER; + st->vref = NULL; + /* Enable internal vref */ + ret = regmap_set_bits(st->map, AD7091R_REG_CONF, + AD7091R_REG_CONF_INT_VREF); + if (ret) + return dev_err_probe(st->dev, ret, + "Error on enable internal reference\n"); } else { ret = regulator_enable(st->vref); if (ret) diff --git a/drivers/iio/adc/ad7091r-base.h b/drivers/iio/adc/ad7091r-base.h index 7a78976a2f80..b9e1c8bf3440 100644 --- a/drivers/iio/adc/ad7091r-base.h +++ b/drivers/iio/adc/ad7091r-base.h @@ -8,6 +8,8 @@ #ifndef __DRIVERS_IIO_ADC_AD7091R_BASE_H__ #define __DRIVERS_IIO_ADC_AD7091R_BASE_H__ +#define AD7091R_REG_CONF_INT_VREF BIT(0) + /* AD7091R_REG_CH_LIMIT */ #define AD7091R_HIGH_LIMIT 0xFFF #define AD7091R_LOW_LIMIT 0x0 From 2ab32986a0b9e329eb7f8f04dd57cc127f797c08 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 13 Dec 2023 17:04:52 +0100 Subject: [PATCH 032/187] dmaengine: fix NULL pointer in channel unregistration function [ Upstream commit f5c24d94512f1b288262beda4d3dcb9629222fc7 ] __dma_async_device_channel_register() can fail. In case of failure, chan->local is freed (with free_percpu()), and chan->local is nullified. When dma_async_device_unregister() is called (because of managed API or intentionally by DMA controller driver), channels are unconditionally unregistered, leading to this NULL pointer: [ 1.318693] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d0 [...] [ 1.484499] Call trace: [ 1.486930] device_del+0x40/0x394 [ 1.490314] device_unregister+0x20/0x7c [ 1.494220] __dma_async_device_channel_unregister+0x68/0xc0 Look at dma_async_device_register() function error path, channel device unregistration is done only if chan->local is not NULL. Then add the same condition at the beginning of __dma_async_device_channel_unregister() function, to avoid NULL pointer issue whatever the API used to reach this function. Fixes: d2fb0a043838 ("dmaengine: break out channel registration") Signed-off-by: Amelie Delaunay Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20231213160452.2598073-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dmaengine.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 8a6e6b60d66f..892e8389232e 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1103,6 +1103,9 @@ EXPORT_SYMBOL_GPL(dma_async_device_channel_register); static void __dma_async_device_channel_unregister(struct dma_device *device, struct dma_chan *chan) { + if (chan->local == NULL) + return; + WARN_ONCE(!device->device_release && chan->client_count, "%s called while %d clients hold a reference\n", __func__, chan->client_count); From 9f29c5d2bf22c726b682e8928e2587843d8ca27f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 Dec 2023 14:52:15 -0800 Subject: [PATCH 033/187] scsi: ufs: core: Remove the ufshcd_hba_exit() call from ufshcd_async_scan() [ Upstream commit ee36710912b2075c417100a8acc642c9c6496501 ] Calling ufshcd_hba_exit() from a function that is called asynchronously from ufshcd_init() is wrong because this triggers multiple race conditions. Instead of calling ufshcd_hba_exit(), log an error message. Reported-by: Daniel Mentz Fixes: 1d337ec2f35e ("ufs: improve init sequence") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20231218225229.2542156-3-bvanassche@acm.org Reviewed-by: Can Guo Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 474e94a69b18..9fd4e9ed93b8 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8333,12 +8333,9 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie) out: pm_runtime_put_sync(hba->dev); - /* - * If we failed to initialize the device or the device is not - * present, turn off the power/clocks etc. - */ + if (ret) - ufshcd_hba_exit(hba); + dev_err(hba->dev, "%s failed: %d\n", __func__, ret); } static const struct attribute_group *ufshcd_driver_groups[] = { From 8191aa4146d92e8e07c35956e5b47617aa09d264 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 20 Nov 2023 17:43:23 +0100 Subject: [PATCH 034/187] arm64: dts: qcom: sc7180: fix USB wakeup interrupt types commit 9b956999bf725fd62613f719c3178fdbee6e5f47 upstream. The DP/DM wakeup interrupts are edge triggered and which edge to trigger on depends on use-case and whether a Low speed or Full/High speed device is connected. Fixes: 0b766e7fe5a2 ("arm64: dts: qcom: sc7180: Add USB related nodes") Cc: stable@vger.kernel.org # 5.10 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231120164331.8116-4-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sc7180.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 78e537f1d796..13fe1c92bf35 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -2769,8 +2769,8 @@ usb_1: usb@a6f8800 { interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 8 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 9 IRQ_TYPE_LEVEL_HIGH>; + <&pdc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; From 595d35c6ae7e834212a306c767b885fc3d688225 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 20 Nov 2023 17:43:28 +0100 Subject: [PATCH 035/187] arm64: dts: qcom: sdm845: fix USB wakeup interrupt types commit 84ad9ac8d9ca29033d589e79a991866b38e23b85 upstream. The DP/DM wakeup interrupts are edge triggered and which edge to trigger on depends on use-case and whether a Low speed or Full/High speed device is connected. Fixes: ca4db2b538a1 ("arm64: dts: qcom: sdm845: Add USB-related nodes") Cc: stable@vger.kernel.org # 4.20 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231120164331.8116-9-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 1e6841902900..3c50580eeb90 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4050,8 +4050,8 @@ usb_1: usb@a6f8800 { interrupts = , , - , - ; + , + ; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; @@ -4101,8 +4101,8 @@ usb_2: usb@a8f8800 { interrupts = , , - , - ; + , + ; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; From eec1f92949a6f72e4654b44c903c9ce5e09bdb7c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 20 Nov 2023 17:43:30 +0100 Subject: [PATCH 036/187] arm64: dts: qcom: sm8150: fix USB wakeup interrupt types commit 54524b6987d1fffe64cbf3dded1b2fa6b903edf9 upstream. The DP/DM wakeup interrupts are edge triggered and which edge to trigger on depends on use-case and whether a Low speed or Full/High speed device is connected. Fixes: 0c9dde0d2015 ("arm64: dts: qcom: sm8150: Add secondary USB and PHY nodes") Fixes: b33d2868e8d3 ("arm64: dts: qcom: sm8150: Add USB and PHY device nodes") Cc: stable@vger.kernel.org # 5.10 Cc: Jonathan Marek Cc: Jack Pham Signed-off-by: Johan Hovold Reviewed-by: Jack Pham Link: https://lore.kernel.org/r/20231120164331.8116-11-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index c3c12b0cd416..b7e8663f6b58 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3630,8 +3630,8 @@ usb_1: usb@a6f8800 { interrupts = , , - , - ; + , + ; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; @@ -3679,8 +3679,8 @@ usb_2: usb@a8f8800 { interrupts = , , - , - ; + , + ; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; From 69ee126bbae099f56a66b0473ae92d39dc95d800 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 20 Nov 2023 17:43:24 +0100 Subject: [PATCH 037/187] arm64: dts: qcom: sc7280: fix usb_1 wakeup interrupt types commit c34199d967a946e55381404fa949382691737521 upstream. A recent cleanup reordering the usb_1 wakeup interrupts inadvertently switched the DP and SuperSpeed interrupt trigger types. Fixes: 4a7ffc10d195 ("arm64: dts: qcom: align DWC3 USB interrupts with DT schema") Cc: stable@vger.kernel.org # 5.19 Cc: Krzysztof Kozlowski Signed-off-by: Johan Hovold Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231120164331.8116-5-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sc7280.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 7fc8c2045022..04106d725400 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -3664,9 +3664,9 @@ usb_1: usb@a6f8800 { assigned-clock-rates = <19200000>, <200000000>; interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 14 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 14 IRQ_TYPE_EDGE_BOTH>, <&pdc 15 IRQ_TYPE_EDGE_BOTH>, - <&pdc 17 IRQ_TYPE_EDGE_BOTH>; + <&pdc 17 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "hs_phy_irq", "dp_hs_phy_irq", "dm_hs_phy_irq", From 016853056885c6fbf80a42d6035cfe17985adc28 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Dec 2023 18:34:00 +0100 Subject: [PATCH 038/187] arm64: dts: qcom: sdm845: fix USB DP/DM HS PHY interrupts commit 204f9ed4bad6293933179517624143b8f412347c upstream. The USB DP/DM HS PHY interrupts need to be provided by the PDC interrupt controller in order to be able to wake the system up from low-power states and to be able to detect disconnect events, which requires triggering on falling edges. A recent commit updated the trigger type but failed to change the interrupt provider as required. This leads to the current Linux driver failing to probe instead of printing an error during suspend and USB wakeup not working as intended. Fixes: 84ad9ac8d9ca ("arm64: dts: qcom: sdm845: fix USB wakeup interrupt types") Fixes: ca4db2b538a1 ("arm64: dts: qcom: sdm845: Add USB-related nodes") Cc: stable@vger.kernel.org # 4.20 Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231213173403.29544-3-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 3c50580eeb90..4d5905ef0b41 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4048,10 +4048,10 @@ usb_1: usb@a6f8800 { <&gcc GCC_USB30_PRIM_MASTER_CLK>; assigned-clock-rates = <19200000>, <150000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc_intc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; @@ -4099,10 +4099,10 @@ usb_2: usb@a8f8800 { <&gcc GCC_USB30_SEC_MASTER_CLK>; assigned-clock-rates = <19200000>, <150000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 10 IRQ_TYPE_EDGE_BOTH>, + <&pdc_intc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; From 2647770eac0972acf81b21089f676a137662dcea Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Dec 2023 18:34:02 +0100 Subject: [PATCH 039/187] arm64: dts: qcom: sm8150: fix USB DP/DM HS PHY interrupts commit 134de5e831775e8b178db9b131c1d3769a766982 upstream. The USB DP/DM HS PHY interrupts need to be provided by the PDC interrupt controller in order to be able to wake the system up from low-power states and to be able to detect disconnect events, which requires triggering on falling edges. A recent commit updated the trigger type but failed to change the interrupt provider as required. This leads to the current Linux driver failing to probe instead of printing an error during suspend and USB wakeup not working as intended. Fixes: 54524b6987d1 ("arm64: dts: qcom: sm8150: fix USB wakeup interrupt types") Fixes: 0c9dde0d2015 ("arm64: dts: qcom: sm8150: Add secondary USB and PHY nodes") Fixes: b33d2868e8d3 ("arm64: dts: qcom: sm8150: Add USB and PHY device nodes") Cc: stable@vger.kernel.org # 5.10 Cc: Jack Pham Cc: Jonathan Marek Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231213173403.29544-5-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index b7e8663f6b58..8efd0e227d78 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3628,10 +3628,10 @@ usb_1: usb@a6f8800 { <&gcc GCC_USB30_PRIM_MASTER_CLK>; assigned-clock-rates = <19200000>, <200000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 8 IRQ_TYPE_EDGE_BOTH>, + <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; @@ -3677,10 +3677,10 @@ usb_2: usb@a8f8800 { <&gcc GCC_USB30_SEC_MASTER_CLK>; assigned-clock-rates = <19200000>, <200000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 10 IRQ_TYPE_EDGE_BOTH>, + <&pdc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; From c014490c0b22371349a7c4aa00a107c2d23268fc Mon Sep 17 00:00:00 2001 From: Alfred Piccioni Date: Tue, 19 Dec 2023 10:09:09 +0100 Subject: [PATCH 040/187] lsm: new security_file_ioctl_compat() hook commit f1bb47a31dff6d4b34fb14e99850860ee74bb003 upstream. Some ioctl commands do not require ioctl permission, but are routed to other permissions such as FILE_GETATTR or FILE_SETATTR. This routing is done by comparing the ioctl cmd to a set of 64-bit flags (FS_IOC_*). However, if a 32-bit process is running on a 64-bit kernel, it emits 32-bit flags (FS_IOC32_*) for certain ioctl operations. These flags are being checked erroneously, which leads to these ioctl operations being routed to the ioctl permission, rather than the correct file permissions. This was also noted in a RED-PEN finding from a while back - "/* RED-PEN how should LSM module know it's handling 32bit? */". This patch introduces a new hook, security_file_ioctl_compat(), that is called from the compat ioctl syscall. All current LSMs have been changed to support this hook. Reviewing the three places where we are currently using security_file_ioctl(), it appears that only SELinux needs a dedicated compat change; TOMOYO and SMACK appear to be functional without any change. Cc: stable@vger.kernel.org Fixes: 0b24dcb7f2f7 ("Revert "selinux: simplify ioctl checking"") Signed-off-by: Alfred Piccioni Reviewed-by: Stephen Smalley [PM: subject tweak, line length fixes, and alignment corrections] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- fs/ioctl.c | 3 +-- include/linux/lsm_hook_defs.h | 2 ++ include/linux/security.h | 9 +++++++++ security/security.c | 18 ++++++++++++++++++ security/selinux/hooks.c | 28 ++++++++++++++++++++++++++++ security/smack/smack_lsm.c | 1 + security/tomoyo/tomoyo.c | 1 + 7 files changed, 60 insertions(+), 2 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 80ac36aea913..2bd05509295a 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -916,8 +916,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, if (!f.file) return -EBADF; - /* RED-PEN how should LSM module know it's handling 32bit? */ - error = security_file_ioctl(f.file, cmd, arg); + error = security_file_ioctl_compat(f.file, cmd, arg); if (error) goto out; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 02b19c508b78..6239a378c0ea 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -165,6 +165,8 @@ LSM_HOOK(int, 0, file_alloc_security, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, unsigned long arg) +LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd, + unsigned long arg) LSM_HOOK(int, 0, mmap_addr, unsigned long addr) LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) diff --git a/include/linux/security.h b/include/linux/security.h index a6c97cc57caa..2772f6375f14 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -385,6 +385,8 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int security_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg); int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags); int security_mmap_addr(unsigned long addr); @@ -969,6 +971,13 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } +static inline int security_file_ioctl_compat(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + return 0; +} + static inline int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { diff --git a/security/security.c b/security/security.c index 5fa286ae9908..fc15b963e102 100644 --- a/security/security.c +++ b/security/security.c @@ -1569,6 +1569,24 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL_GPL(security_file_ioctl); +/** + * security_file_ioctl_compat() - Check if an ioctl is allowed in compat mode + * @file: associated file + * @cmd: ioctl cmd + * @arg: ioctl arguments + * + * Compat version of security_file_ioctl() that correctly handles 32-bit + * processes running on 64-bit kernels. + * + * Return: Returns 0 if permission is granted. + */ +int security_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return call_int_hook(file_ioctl_compat, 0, file, cmd, arg); +} +EXPORT_SYMBOL_GPL(security_file_ioctl_compat); + static inline unsigned long mmap_prot(struct file *file, unsigned long prot) { /* diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d45e9fa74e62..78f3da39b031 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3718,6 +3718,33 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, return error; } +static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg) +{ + /* + * If we are in a 64-bit kernel running 32-bit userspace, we need to + * make sure we don't compare 32-bit flags to 64-bit flags. + */ + switch (cmd) { + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; + break; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; + break; + case FS_IOC32_GETVERSION: + cmd = FS_IOC_GETVERSION; + break; + case FS_IOC32_SETVERSION: + cmd = FS_IOC_SETVERSION; + break; + default: + break; + } + + return selinux_file_ioctl(file, cmd, arg); +} + static int default_noexec __ro_after_init; static int file_map_prot_check(struct file *file, unsigned long prot, int shared) @@ -7135,6 +7162,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(file_permission, selinux_file_permission), LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security), LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat), LSM_HOOK_INIT(mmap_file, selinux_mmap_file), LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr), LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index cd6a03e945eb..fbadc61feedd 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4905,6 +4905,7 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(file_alloc_security, smack_file_alloc_security), LSM_HOOK_INIT(file_ioctl, smack_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, smack_file_ioctl), LSM_HOOK_INIT(file_lock, smack_file_lock), LSM_HOOK_INIT(file_fcntl, smack_file_fcntl), LSM_HOOK_INIT(mmap_file, smack_mmap_file), diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 71e82d855ebf..fb599401bc7d 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -555,6 +555,7 @@ static struct security_hook_list tomoyo_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(path_rename, tomoyo_path_rename), LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr), LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl), + LSM_HOOK_INIT(file_ioctl_compat, tomoyo_file_ioctl), LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod), LSM_HOOK_INIT(path_chown, tomoyo_path_chown), LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot), From efe3ec706618e781ec7e53420c021dd55cdef0a0 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 1 Jan 2024 00:59:59 +0100 Subject: [PATCH 041/187] docs: kernel_abi.py: fix command injection commit 3231dd5862779c2e15633c96133a53205ad660ce upstream. The kernel-abi directive passes its argument straight to the shell. This is unfortunate and unnecessary. Let's always use paths relative to $srctree/Documentation/ and use subprocess.check_call() instead of subprocess.Popen(shell=True). This also makes the code shorter. Link: https://fosstodon.org/@jani/111676532203641247 Reported-by: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20231231235959.3342928-2-vegard.nossum@oracle.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/abi-obsolete.rst | 2 +- Documentation/admin-guide/abi-removed.rst | 2 +- Documentation/admin-guide/abi-stable.rst | 2 +- Documentation/admin-guide/abi-testing.rst | 2 +- Documentation/sphinx/kernel_abi.py | 56 ++++------------------ 5 files changed, 14 insertions(+), 50 deletions(-) diff --git a/Documentation/admin-guide/abi-obsolete.rst b/Documentation/admin-guide/abi-obsolete.rst index d095867899c5..594e697aa1b2 100644 --- a/Documentation/admin-guide/abi-obsolete.rst +++ b/Documentation/admin-guide/abi-obsolete.rst @@ -7,5 +7,5 @@ marked to be removed at some later point in time. The description of the interface will document the reason why it is obsolete and when it can be expected to be removed. -.. kernel-abi:: $srctree/Documentation/ABI/obsolete +.. kernel-abi:: ABI/obsolete :rst: diff --git a/Documentation/admin-guide/abi-removed.rst b/Documentation/admin-guide/abi-removed.rst index f7e9e43023c1..f9e000c81828 100644 --- a/Documentation/admin-guide/abi-removed.rst +++ b/Documentation/admin-guide/abi-removed.rst @@ -1,5 +1,5 @@ ABI removed symbols =================== -.. kernel-abi:: $srctree/Documentation/ABI/removed +.. kernel-abi:: ABI/removed :rst: diff --git a/Documentation/admin-guide/abi-stable.rst b/Documentation/admin-guide/abi-stable.rst index 70490736e0d3..fc3361d847b1 100644 --- a/Documentation/admin-guide/abi-stable.rst +++ b/Documentation/admin-guide/abi-stable.rst @@ -10,5 +10,5 @@ for at least 2 years. Most interfaces (like syscalls) are expected to never change and always be available. -.. kernel-abi:: $srctree/Documentation/ABI/stable +.. kernel-abi:: ABI/stable :rst: diff --git a/Documentation/admin-guide/abi-testing.rst b/Documentation/admin-guide/abi-testing.rst index b205b16a72d0..19767926b344 100644 --- a/Documentation/admin-guide/abi-testing.rst +++ b/Documentation/admin-guide/abi-testing.rst @@ -16,5 +16,5 @@ Programs that use these interfaces are strongly encouraged to add their name to the description of these interfaces, so that the kernel developers can easily notify them if any changes occur. -.. kernel-abi:: $srctree/Documentation/ABI/testing +.. kernel-abi:: ABI/testing :rst: diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index b5feb5b1d905..6d8a637ad566 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -39,8 +39,6 @@ import sys import re import kernellog -from os import path - from docutils import nodes, statemachine from docutils.statemachine import ViewList from docutils.parsers.rst import directives, Directive @@ -73,60 +71,26 @@ class KernelCmd(Directive): } def run(self): - doc = self.state.document if not doc.settings.file_insertion_enabled: raise self.warning("docutils: file insertion disabled") - env = doc.settings.env - cwd = path.dirname(doc.current_source) - cmd = "get_abi.pl rest --enable-lineno --dir " - cmd += self.arguments[0] + srctree = os.path.abspath(os.environ["srctree"]) + + args = [ + os.path.join(srctree, 'scripts/get_abi.pl'), + 'rest', + '--enable-lineno', + '--dir', os.path.join(srctree, 'Documentation', self.arguments[0]), + ] if 'rst' in self.options: - cmd += " --rst-source" + args.append('--rst-source') - srctree = path.abspath(os.environ["srctree"]) - - fname = cmd - - # extend PATH with $(srctree)/scripts - path_env = os.pathsep.join([ - srctree + os.sep + "scripts", - os.environ["PATH"] - ]) - shell_env = os.environ.copy() - shell_env["PATH"] = path_env - shell_env["srctree"] = srctree - - lines = self.runCmd(cmd, shell=True, cwd=cwd, env=shell_env) + lines = subprocess.check_output(args, cwd=os.path.dirname(doc.current_source)).decode('utf-8') nodeList = self.nestedParse(lines, self.arguments[0]) return nodeList - def runCmd(self, cmd, **kwargs): - u"""Run command ``cmd`` and return its stdout as unicode.""" - - try: - proc = subprocess.Popen( - cmd - , stdout = subprocess.PIPE - , stderr = subprocess.PIPE - , **kwargs - ) - out, err = proc.communicate() - - out, err = codecs.decode(out, 'utf-8'), codecs.decode(err, 'utf-8') - - if proc.returncode != 0: - raise self.severe( - u"command '%s' failed with return code %d" - % (cmd, proc.returncode) - ) - except OSError as exc: - raise self.severe(u"problems with '%s' directive: %s." - % (self.name, ErrorString(exc))) - return out - def nestedParse(self, lines, fname): env = self.state.document.settings.env content = ViewList() From c160f2ac85e06582f385808b900fc12e2858ceb0 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 1 Jan 2024 00:59:58 +0100 Subject: [PATCH 042/187] scripts/get_abi: fix source path leak commit 5889d6ede53bc17252f79c142387e007224aa554 upstream. The code currently leaks the absolute path of the ABI files into the rendered documentation. There exists code to prevent this, but it is not effective when an absolute path is passed, which it is when $srctree is used. I consider this to be a minimal, stop-gap fix; a better fix would strip off the actual prefix instead of hacking it off with a regex. Link: https://mastodon.social/@vegard/111677490643495163 Cc: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20231231235959.3342928-1-vegard.nossum@oracle.com Signed-off-by: Greg Kroah-Hartman --- scripts/get_abi.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl index 0ffd5531242a..408bfd0216da 100755 --- a/scripts/get_abi.pl +++ b/scripts/get_abi.pl @@ -98,7 +98,7 @@ sub parse_abi { $name =~ s,.*/,,; my $fn = $file; - $fn =~ s,Documentation/ABI/,,; + $fn =~ s,.*Documentation/ABI/,,; my $nametag = "File $fn"; $data{$nametag}->{what} = "File $name"; From d78fac87c66d64ac759b51adb073c3828340c74c Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Thu, 23 Nov 2023 23:32:05 +0100 Subject: [PATCH 043/187] media: videobuf2-dma-sg: fix vmap callback commit 608ca5a60ee47b48fec210aeb7a795a64eb5dcee upstream. For dmabuf import users to be able to use the vaddr from another videobuf2-dma-sg source, the exporter needs to set a proper vaddr on vb2_dma_sg_dmabuf_ops_vmap callback. This patch adds vmap on map if buf->vaddr was not set. Cc: stable@kernel.org Fixes: 7938f4218168 ("dma-buf-map: Rename to iosys-map") Signed-off-by: Michael Grzeschik Acked-by: Tomasz Figa Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/common/videobuf2/videobuf2-dma-sg.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c index fa69158a65b1..cb1e4bc69dba 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c @@ -494,9 +494,15 @@ vb2_dma_sg_dmabuf_ops_end_cpu_access(struct dma_buf *dbuf, static int vb2_dma_sg_dmabuf_ops_vmap(struct dma_buf *dbuf, struct iosys_map *map) { - struct vb2_dma_sg_buf *buf = dbuf->priv; + struct vb2_dma_sg_buf *buf; + void *vaddr; - iosys_map_set_vaddr(map, buf->vaddr); + buf = dbuf->priv; + vaddr = vb2_dma_sg_vaddr(buf->vb, buf); + if (!vaddr) + return -EINVAL; + + iosys_map_set_vaddr(map, vaddr); return 0; } From c4edcd134bb72b3b0acc884612d624e48c9d057f Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Wed, 29 Nov 2023 11:25:35 +0200 Subject: [PATCH 044/187] mmc: core: Use mrq.sbc in close-ended ffu commit 4d0c8d0aef6355660b6775d57ccd5d4ea2e15802 upstream. Field Firmware Update (ffu) may use close-ended or open ended sequence. Each such sequence is comprised of a write commands enclosed between 2 switch commands - to and from ffu mode. So for the close-ended case, it will be: cmd6->cmd23-cmd25-cmd6. Some host controllers however, get confused when multi-block rw is sent without sbc, and may generate auto-cmd12 which breaks the ffu sequence. I encountered this issue while testing fwupd (github.com/fwupd/fwupd) on HP Chromebook x2, a qualcomm based QC-7c, code name - strongbad. Instead of a quirk, or hooking the request function of the msm ops, it would be better to fix the ioctl handling and make it use mrq.sbc instead of issuing SET_BLOCK_COUNT separately. Signed-off-by: Avri Altman Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231129092535.3278-1-avri.altman@wdc.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 46 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index e9ce53d200bc..ea60efaecb0d 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -402,6 +402,10 @@ struct mmc_blk_ioc_data { struct mmc_ioc_cmd ic; unsigned char *buf; u64 buf_bytes; + unsigned int flags; +#define MMC_BLK_IOC_DROP BIT(0) /* drop this mrq */ +#define MMC_BLK_IOC_SBC BIT(1) /* use mrq.sbc */ + struct mmc_rpmb_data *rpmb; }; @@ -467,7 +471,7 @@ static int mmc_blk_ioctl_copy_to_user(struct mmc_ioc_cmd __user *ic_ptr, } static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, - struct mmc_blk_ioc_data *idata) + struct mmc_blk_ioc_data **idatas, int i) { struct mmc_command cmd = {}, sbc = {}; struct mmc_data data = {}; @@ -477,10 +481,18 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, unsigned int busy_timeout_ms; int err; unsigned int target_part; + struct mmc_blk_ioc_data *idata = idatas[i]; + struct mmc_blk_ioc_data *prev_idata = NULL; if (!card || !md || !idata) return -EINVAL; + if (idata->flags & MMC_BLK_IOC_DROP) + return 0; + + if (idata->flags & MMC_BLK_IOC_SBC) + prev_idata = idatas[i - 1]; + /* * The RPMB accesses comes in from the character device, so we * need to target these explicitly. Else we just target the @@ -547,7 +559,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, return err; } - if (idata->rpmb) { + if (idata->rpmb || prev_idata) { sbc.opcode = MMC_SET_BLOCK_COUNT; /* * We don't do any blockcount validation because the max size @@ -555,6 +567,8 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, * 'Reliable Write' bit here. */ sbc.arg = data.blocks | (idata->ic.write_flag & BIT(31)); + if (prev_idata) + sbc.arg = prev_idata->ic.arg; sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; mrq.sbc = &sbc; } @@ -572,6 +586,15 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, mmc_wait_for_req(card->host, &mrq); memcpy(&idata->ic.response, cmd.resp, sizeof(cmd.resp)); + if (prev_idata) { + memcpy(&prev_idata->ic.response, sbc.resp, sizeof(sbc.resp)); + if (sbc.error) { + dev_err(mmc_dev(card->host), "%s: sbc error %d\n", + __func__, sbc.error); + return sbc.error; + } + } + if (cmd.error) { dev_err(mmc_dev(card->host), "%s: cmd error %d\n", __func__, cmd.error); @@ -1057,6 +1080,20 @@ static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type) md->reset_done &= ~type; } +static void mmc_blk_check_sbc(struct mmc_queue_req *mq_rq) +{ + struct mmc_blk_ioc_data **idata = mq_rq->drv_op_data; + int i; + + for (i = 1; i < mq_rq->ioc_count; i++) { + if (idata[i - 1]->ic.opcode == MMC_SET_BLOCK_COUNT && + mmc_op_multi(idata[i]->ic.opcode)) { + idata[i - 1]->flags |= MMC_BLK_IOC_DROP; + idata[i]->flags |= MMC_BLK_IOC_SBC; + } + } +} + /* * The non-block commands come back from the block layer after it queued it and * processed it with all other requests and then they get issued in this @@ -1084,11 +1121,14 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) if (ret) break; } + + mmc_blk_check_sbc(mq_rq); + fallthrough; case MMC_DRV_OP_IOCTL_RPMB: idata = mq_rq->drv_op_data; for (i = 0, ret = 0; i < mq_rq->ioc_count; i++) { - ret = __mmc_blk_ioctl_cmd(card, md, idata[i]); + ret = __mmc_blk_ioctl_cmd(card, md, idata, i); if (ret) break; } From a33fbb8b6d45a618e84685abd4b30ff2a553abee Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 8 Dec 2023 00:19:01 +0200 Subject: [PATCH 045/187] mmc: mmc_spi: remove custom DMA mapped buffers commit 84a6be7db9050dd2601c9870f65eab9a665d2d5d upstream. There is no need to duplicate what SPI core or individual controller drivers already do, i.e. mapping the buffers for DMA capable transfers. Note, that the code, besides its redundancy, was buggy: strictly speaking there is no guarantee, while it's true for those which can use this code (see below), that the SPI host controller _is_ the device which does DMA. Also see the Link tags below. Additional notes. Currently only two SPI host controller drivers may use premapped (by the user) DMA buffers: - drivers/spi/spi-au1550.c - drivers/spi/spi-fsl-spi.c Both of them have DMA mapping support code. I don't expect that SPI host controller code is worse than what has been done in mmc_spi. Hence I do not expect any regressions here. Otherwise, I'm pretty much sure these regressions have to be fixed in the respective drivers, and not here. That said, remove all related pieces of DMA mapping code from mmc_spi. Link: https://lore.kernel.org/linux-mmc/c73b9ba9-1699-2aff-e2fd-b4b4f292a3ca@raspberrypi.org/ Link: https://stackoverflow.com/questions/67620728/mmc-spi-issue-not-able-to-setup-mmc-sd-card-in-linux Signed-off-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231207221901.3259962-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mmc_spi.c | 186 +------------------------------------ 1 file changed, 5 insertions(+), 181 deletions(-) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index cc333ad67cac..2a99ffb61f8c 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -119,19 +119,14 @@ struct mmc_spi_host { struct spi_transfer status; struct spi_message readback; - /* underlying DMA-aware controller, or null */ - struct device *dma_dev; - /* buffer used for commands and for message "overhead" */ struct scratch *data; - dma_addr_t data_dma; /* Specs say to write ones most of the time, even when the card * has no need to read its input data; and many cards won't care. * This is our source of those ones. */ void *ones; - dma_addr_t ones_dma; }; @@ -147,11 +142,8 @@ static inline int mmc_cs_off(struct mmc_spi_host *host) return spi_setup(host->spi); } -static int -mmc_spi_readbytes(struct mmc_spi_host *host, unsigned len) +static int mmc_spi_readbytes(struct mmc_spi_host *host, unsigned int len) { - int status; - if (len > sizeof(*host->data)) { WARN_ON(1); return -EIO; @@ -159,19 +151,7 @@ mmc_spi_readbytes(struct mmc_spi_host *host, unsigned len) host->status.len = len; - if (host->dma_dev) - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_FROM_DEVICE); - - status = spi_sync_locked(host->spi, &host->readback); - - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_FROM_DEVICE); - - return status; + return spi_sync_locked(host->spi, &host->readback); } static int mmc_spi_skip(struct mmc_spi_host *host, unsigned long timeout, @@ -506,23 +486,11 @@ mmc_spi_command_send(struct mmc_spi_host *host, t = &host->t; memset(t, 0, sizeof(*t)); t->tx_buf = t->rx_buf = data->status; - t->tx_dma = t->rx_dma = host->data_dma; t->len = cp - data->status; t->cs_change = 1; spi_message_add_tail(t, &host->m); - if (host->dma_dev) { - host->m.is_dma_mapped = 1; - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); - } status = spi_sync_locked(host->spi, &host->m); - - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); if (status < 0) { dev_dbg(&host->spi->dev, " ... write returned %d\n", status); cmd->error = status; @@ -540,9 +508,6 @@ mmc_spi_command_send(struct mmc_spi_host *host, * We always provide TX data for data and CRC. The MMC/SD protocol * requires us to write ones; but Linux defaults to writing zeroes; * so we explicitly initialize it to all ones on RX paths. - * - * We also handle DMA mapping, so the underlying SPI controller does - * not need to (re)do it for each message. */ static void mmc_spi_setup_data_message( @@ -552,11 +517,8 @@ mmc_spi_setup_data_message( { struct spi_transfer *t; struct scratch *scratch = host->data; - dma_addr_t dma = host->data_dma; spi_message_init(&host->m); - if (dma) - host->m.is_dma_mapped = 1; /* for reads, readblock() skips 0xff bytes before finding * the token; for writes, this transfer issues that token. @@ -570,8 +532,6 @@ mmc_spi_setup_data_message( else scratch->data_token = SPI_TOKEN_SINGLE; t->tx_buf = &scratch->data_token; - if (dma) - t->tx_dma = dma + offsetof(struct scratch, data_token); spi_message_add_tail(t, &host->m); } @@ -581,7 +541,6 @@ mmc_spi_setup_data_message( t = &host->t; memset(t, 0, sizeof(*t)); t->tx_buf = host->ones; - t->tx_dma = host->ones_dma; /* length and actual buffer info are written later */ spi_message_add_tail(t, &host->m); @@ -591,14 +550,9 @@ mmc_spi_setup_data_message( if (direction == DMA_TO_DEVICE) { /* the actual CRC may get written later */ t->tx_buf = &scratch->crc_val; - if (dma) - t->tx_dma = dma + offsetof(struct scratch, crc_val); } else { t->tx_buf = host->ones; - t->tx_dma = host->ones_dma; t->rx_buf = &scratch->crc_val; - if (dma) - t->rx_dma = dma + offsetof(struct scratch, crc_val); } spi_message_add_tail(t, &host->m); @@ -621,10 +575,7 @@ mmc_spi_setup_data_message( memset(t, 0, sizeof(*t)); t->len = (direction == DMA_TO_DEVICE) ? sizeof(scratch->status) : 1; t->tx_buf = host->ones; - t->tx_dma = host->ones_dma; t->rx_buf = scratch->status; - if (dma) - t->rx_dma = dma + offsetof(struct scratch, status); t->cs_change = 1; spi_message_add_tail(t, &host->m); } @@ -653,23 +604,13 @@ mmc_spi_writeblock(struct mmc_spi_host *host, struct spi_transfer *t, if (host->mmc->use_spi_crc) scratch->crc_val = cpu_to_be16(crc_itu_t(0, t->tx_buf, t->len)); - if (host->dma_dev) - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); status = spi_sync_locked(spi, &host->m); - if (status != 0) { dev_dbg(&spi->dev, "write error (%d)\n", status); return status; } - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - /* * Get the transmission data-response reply. It must follow * immediately after the data block we transferred. This reply @@ -718,8 +659,6 @@ mmc_spi_writeblock(struct mmc_spi_host *host, struct spi_transfer *t, } t->tx_buf += t->len; - if (host->dma_dev) - t->tx_dma += t->len; /* Return when not busy. If we didn't collect that status yet, * we'll need some more I/O. @@ -783,30 +722,12 @@ mmc_spi_readblock(struct mmc_spi_host *host, struct spi_transfer *t, } leftover = status << 1; - if (host->dma_dev) { - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - dma_sync_single_for_device(host->dma_dev, - t->rx_dma, t->len, - DMA_FROM_DEVICE); - } - status = spi_sync_locked(spi, &host->m); if (status < 0) { dev_dbg(&spi->dev, "read error %d\n", status); return status; } - if (host->dma_dev) { - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - dma_sync_single_for_cpu(host->dma_dev, - t->rx_dma, t->len, - DMA_FROM_DEVICE); - } - if (bitshift) { /* Walk through the data and the crc and do * all the magic to get byte-aligned data. @@ -841,8 +762,6 @@ mmc_spi_readblock(struct mmc_spi_host *host, struct spi_transfer *t, } t->rx_buf += t->len; - if (host->dma_dev) - t->rx_dma += t->len; return 0; } @@ -857,7 +776,6 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, struct mmc_data *data, u32 blk_size) { struct spi_device *spi = host->spi; - struct device *dma_dev = host->dma_dev; struct spi_transfer *t; enum dma_data_direction direction = mmc_get_dma_dir(data); struct scatterlist *sg; @@ -884,31 +802,8 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, */ for_each_sg(data->sg, sg, data->sg_len, n_sg) { int status = 0; - dma_addr_t dma_addr = 0; void *kmap_addr; unsigned length = sg->length; - enum dma_data_direction dir = direction; - - /* set up dma mapping for controller drivers that might - * use DMA ... though they may fall back to PIO - */ - if (dma_dev) { - /* never invalidate whole *shared* pages ... */ - if ((sg->offset != 0 || length != PAGE_SIZE) - && dir == DMA_FROM_DEVICE) - dir = DMA_BIDIRECTIONAL; - - dma_addr = dma_map_page(dma_dev, sg_page(sg), 0, - PAGE_SIZE, dir); - if (dma_mapping_error(dma_dev, dma_addr)) { - data->error = -EFAULT; - break; - } - if (direction == DMA_TO_DEVICE) - t->tx_dma = dma_addr + sg->offset; - else - t->rx_dma = dma_addr + sg->offset; - } /* allow pio too; we don't allow highmem */ kmap_addr = kmap(sg_page(sg)); @@ -941,8 +836,6 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, if (direction == DMA_FROM_DEVICE) flush_dcache_page(sg_page(sg)); kunmap(sg_page(sg)); - if (dma_dev) - dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir); if (status < 0) { data->error = status; @@ -977,21 +870,9 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, scratch->status[0] = SPI_TOKEN_STOP_TRAN; host->early_status.tx_buf = host->early_status.rx_buf; - host->early_status.tx_dma = host->early_status.rx_dma; host->early_status.len = statlen; - if (host->dma_dev) - dma_sync_single_for_device(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - tmp = spi_sync_locked(spi, &host->m); - - if (host->dma_dev) - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*scratch), - DMA_BIDIRECTIONAL); - if (tmp < 0) { if (!data->error) data->error = tmp; @@ -1265,52 +1146,6 @@ mmc_spi_detect_irq(int irq, void *mmc) return IRQ_HANDLED; } -#ifdef CONFIG_HAS_DMA -static int mmc_spi_dma_alloc(struct mmc_spi_host *host) -{ - struct spi_device *spi = host->spi; - struct device *dev; - - if (!spi->master->dev.parent->dma_mask) - return 0; - - dev = spi->master->dev.parent; - - host->ones_dma = dma_map_single(dev, host->ones, MMC_SPI_BLOCKSIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, host->ones_dma)) - return -ENOMEM; - - host->data_dma = dma_map_single(dev, host->data, sizeof(*host->data), - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, host->data_dma)) { - dma_unmap_single(dev, host->ones_dma, MMC_SPI_BLOCKSIZE, - DMA_TO_DEVICE); - return -ENOMEM; - } - - dma_sync_single_for_cpu(dev, host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); - - host->dma_dev = dev; - return 0; -} - -static void mmc_spi_dma_free(struct mmc_spi_host *host) -{ - if (!host->dma_dev) - return; - - dma_unmap_single(host->dma_dev, host->ones_dma, MMC_SPI_BLOCKSIZE, - DMA_TO_DEVICE); - dma_unmap_single(host->dma_dev, host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); -} -#else -static inline int mmc_spi_dma_alloc(struct mmc_spi_host *host) { return 0; } -static inline void mmc_spi_dma_free(struct mmc_spi_host *host) {} -#endif - static int mmc_spi_probe(struct spi_device *spi) { void *ones; @@ -1402,24 +1237,17 @@ static int mmc_spi_probe(struct spi_device *spi) host->powerup_msecs = 250; } - /* preallocate dma buffers */ + /* Preallocate buffers */ host->data = kmalloc(sizeof(*host->data), GFP_KERNEL); if (!host->data) goto fail_nobuf1; - status = mmc_spi_dma_alloc(host); - if (status) - goto fail_dma; - /* setup message for status/busy readback */ spi_message_init(&host->readback); - host->readback.is_dma_mapped = (host->dma_dev != NULL); spi_message_add_tail(&host->status, &host->readback); host->status.tx_buf = host->ones; - host->status.tx_dma = host->ones_dma; host->status.rx_buf = &host->data->status; - host->status.rx_dma = host->data_dma + offsetof(struct scratch, status); host->status.cs_change = 1; /* register card detect irq */ @@ -1464,9 +1292,8 @@ static int mmc_spi_probe(struct spi_device *spi) if (!status) has_ro = true; - dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n", + dev_info(&spi->dev, "SD/MMC host %s%s%s%s\n", dev_name(&mmc->class_dev), - host->dma_dev ? "" : ", no DMA", has_ro ? "" : ", no WP", (host->pdata && host->pdata->setpower) ? "" : ", no poweroff", @@ -1477,8 +1304,6 @@ static int mmc_spi_probe(struct spi_device *spi) fail_gpiod_request: mmc_remove_host(mmc); fail_glue_init: - mmc_spi_dma_free(host); -fail_dma: kfree(host->data); fail_nobuf1: mmc_spi_put_pdata(spi); @@ -1500,7 +1325,6 @@ static void mmc_spi_remove(struct spi_device *spi) mmc_remove_host(mmc); - mmc_spi_dma_free(host); kfree(host->data); kfree(host->ones); From 9fec4db7fff54d9b0306a332bab31eac47eeb5f6 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Mon, 6 Nov 2023 15:48:10 +0100 Subject: [PATCH 046/187] media: mtk-jpeg: Fix use after free bug due to error path handling in mtk_jpeg_dec_device_run commit 206c857dd17d4d026de85866f1b5f0969f2a109e upstream. In mtk_jpeg_probe, &jpeg->job_timeout_work is bound with mtk_jpeg_job_timeout_work. In mtk_jpeg_dec_device_run, if error happens in mtk_jpeg_set_dec_dst, it will finally start the worker while mark the job as finished by invoking v4l2_m2m_job_finish. There are two methods to trigger the bug. If we remove the module, it which will call mtk_jpeg_remove to make cleanup. The possible sequence is as follows, which will cause a use-after-free bug. CPU0 CPU1 mtk_jpeg_dec_... | start worker | |mtk_jpeg_job_timeout_work mtk_jpeg_remove | v4l2_m2m_release | kfree(m2m_dev); | | | v4l2_m2m_get_curr_priv | m2m_dev->curr_ctx //use If we close the file descriptor, which will call mtk_jpeg_release, it will have a similar sequence. Fix this bug by starting timeout worker only if started jpegdec worker successfully. Then v4l2_m2m_job_finish will only be called in either mtk_jpeg_job_timeout_work or mtk_jpeg_dec_device_run. Fixes: b2f0d2724ba4 ("[media] vcodec: mediatek: Add Mediatek JPEG Decoder Driver") Signed-off-by: Zheng Wang Signed-off-by: Dmitry Osipenko Cc: stable@vger.kernel.org Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c index 3071b61946c3..f911d3c7dd86 100644 --- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c +++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c @@ -974,13 +974,13 @@ static void mtk_jpeg_dec_device_run(void *priv) if (ret < 0) goto dec_end; - schedule_delayed_work(&jpeg->job_timeout_work, - msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC)); - mtk_jpeg_set_dec_src(ctx, &src_buf->vb2_buf, &bs); if (mtk_jpeg_set_dec_dst(ctx, &jpeg_src_buf->dec_param, &dst_buf->vb2_buf, &fb)) goto dec_end; + schedule_delayed_work(&jpeg->job_timeout_work, + msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC)); + spin_lock_irqsave(&jpeg->hw_lock, flags); mtk_jpeg_dec_reset(jpeg->reg_base); mtk_jpeg_dec_set_config(jpeg->reg_base, From 2b1dc0666e7f6c8c794147d28270eec53396887f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 10 Jan 2024 11:29:20 -0600 Subject: [PATCH 047/187] arm64: Rename ARM64_WORKAROUND_2966298 commit 546b7cde9b1dd36089649101b75266564600ffe5 upstream. In preparation to apply ARM64_WORKAROUND_2966298 for multiple errata, rename the kconfig and capability. No functional change. Cc: stable@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20240110-arm-errata-a510-v1-1-d02bc51aeeee@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 4 ++++ arch/arm64/kernel/cpu_errata.c | 4 ++-- arch/arm64/kernel/entry.S | 2 +- arch/arm64/tools/cpucaps | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ea70eb960565..c15f71501c6c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -983,8 +983,12 @@ config ARM64_ERRATUM_2457168 If unsure, say Y. +config ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + bool + config ARM64_ERRATUM_2966298 bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load" + select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD default y help This option adds the workaround for ARM Cortex-A520 erratum 2966298. diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 3f917124684c..61f22e9c92b4 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -723,10 +723,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_clear_bf16_from_user_emulation, }, #endif -#ifdef CONFIG_ARM64_ERRATUM_2966298 +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD { .desc = "ARM erratum 2966298", - .capability = ARM64_WORKAROUND_2966298, + .capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD, /* Cortex-A520 r0p0 - r0p1 */ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), }, diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index de16fa917e1b..62146d48dba7 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -419,7 +419,7 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 -alternative_if ARM64_WORKAROUND_2966298 +alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD tlbi vale1, xzr dsb nsh alternative_else_nop_endif diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index e73830d9f136..2dc7ddee5f04 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -71,7 +71,6 @@ WORKAROUND_2064142 WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2658417 -WORKAROUND_2966298 WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE @@ -87,3 +86,4 @@ WORKAROUND_NVIDIA_CARMEL_CNP WORKAROUND_QCOM_FALKOR_E1003 WORKAROUND_REPEAT_TLBI WORKAROUND_SPECULATIVE_AT +WORKAROUND_SPECULATIVE_UNPRIV_LOAD From aca1ea92f518b38d0b7651a8f4823df6774e8c70 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 6 Nov 2023 10:23:10 -0600 Subject: [PATCH 048/187] rtc: cmos: Use ACPI alarm for non-Intel x86 systems too commit 3d762e21d56370a43478b55e604b4a83dd85aafc upstream. Intel systems > 2015 have been configured to use ACPI alarm instead of HPET to avoid s2idle issues. Having HPET programmed for wakeup causes problems on AMD systems with s2idle as well. One particular case is that the systemd "SuspendThenHibernate" feature doesn't work properly on the Framework 13" AMD model. Switching to using ACPI alarm fixes the issue. Adjust the quirk to apply to AMD/Hygon systems from 2021 onwards. This matches what has been tested and is specifically to avoid potential risk to older systems. Cc: # 6.1+ Reported-by: Reported-by: Closes: https://github.com/systemd/systemd/issues/24279 Reported-by: Kelvie Wong Closes: https://community.frame.work/t/systemd-suspend-then-hibernate-wakes-up-after-5-minutes/39392 Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20231106162310.85711-1-mario.limonciello@amd.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-cmos.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 00e2ca7374ec..bb7a9bdbfc31 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -818,18 +818,24 @@ static void rtc_wake_off(struct device *dev) } #ifdef CONFIG_X86 -/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ static void use_acpi_alarm_quirks(void) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (dmi_get_bios_year() < 2015) + return; + break; + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: + if (dmi_get_bios_year() < 2021) + return; + break; + default: return; - + } if (!is_hpet_enabled()) return; - if (dmi_get_bios_year() < 2015) - return; - use_acpi_alarm = true; } #else From 911e7206c850622427df55cc1bb9af9f2aa84b3e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 27 Nov 2023 23:36:51 -0600 Subject: [PATCH 049/187] rtc: Adjust failure return code for cmos_set_alarm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1311a8f0d4b23f58bbababa13623aa40b8ad4e0c upstream. When mc146818_avoid_UIP() fails to return a valid value, this is because UIP didn't clear in the timeout period. Adjust the return code in this case to -ETIMEDOUT. Tested-by: Mateusz JoÅ„czyk Reviewed-by: Mateusz JoÅ„czyk Acked-by: Mateusz JoÅ„czyk Cc: Fixes: cdedc45c579f ("rtc: cmos: avoid UIP when reading alarm time") Fixes: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20231128053653.101798-3-mario.limonciello@amd.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-cmos.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index bb7a9bdbfc31..843e7d8b71dd 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -292,7 +292,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) /* This not only a rtc_op, but also called directly */ if (!is_valid_irq(cmos->irq)) - return -EIO; + return -ETIMEDOUT; /* Basic alarms only support hour, minute, and seconds fields. * Some also support day and month, for alarms up to a year in @@ -557,7 +557,7 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) * Use mc146818_avoid_UIP() to avoid this. */ if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p)) - return -EIO; + return -ETIMEDOUT; cmos->alarm_expires = rtc_tm_to_time64(&t->time); From fd1f5396be24143989d7f564445ced2d8f595564 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 27 Nov 2023 23:36:50 -0600 Subject: [PATCH 050/187] rtc: mc146818-lib: Adjust failure return code for mc146818_get_time() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit af838635a3eb9b1bc0d98599c101ebca98f31311 upstream. mc146818_get_time() calls mc146818_avoid_UIP() to avoid fetching the time while RTC update is in progress (UIP). When this fails, the return code is -EIO, but actually there was no IO failure. The reason for the return from mc146818_avoid_UIP() is that the UIP wasn't cleared in the time period. Adjust the return code to -ETIMEDOUT to match the behavior. Tested-by: Mateusz JoÅ„czyk Reviewed-by: Mateusz JoÅ„czyk Acked-by: Mateusz JoÅ„czyk Cc: Fixes: 2a61b0ac5493 ("rtc: mc146818-lib: refactor mc146818_get_time") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20231128053653.101798-2-mario.limonciello@amd.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-mc146818-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index f1c09f1db044..43a28e82674e 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -138,7 +138,7 @@ int mc146818_get_time(struct rtc_time *time) if (!mc146818_avoid_UIP(mc146818_get_time_callback, &p)) { memset(time, 0, sizeof(*time)); - return -EIO; + return -ETIMEDOUT; } if (!(p.ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) From 7971389316e5af7702bbbd25789e642f5b276695 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 27 Nov 2023 23:36:52 -0600 Subject: [PATCH 051/187] rtc: Add support for configuring the UIP timeout for RTC reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 120931db07b49252aba2073096b595482d71857c upstream. The UIP timeout is hardcoded to 10ms for all RTC reads, but in some contexts this might not be enough time. Add a timeout parameter to mc146818_get_time() and mc146818_get_time_callback(). If UIP timeout is configured by caller to be >=100 ms and a call takes this long, log a warning. Make all callers use 10ms to ensure no functional changes. Cc: # 6.1.y Fixes: ec5895c0f2d8 ("rtc: mc146818-lib: extract mc146818_avoid_UIP") Signed-off-by: Mario Limonciello Tested-by: Mateusz JoÅ„czyk Reviewed-by: Mateusz JoÅ„czyk Acked-by: Mateusz JoÅ„czyk Link: https://lore.kernel.org/r/20231128053653.101798-4-mario.limonciello@amd.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/rtc.c | 2 +- arch/x86/kernel/hpet.c | 2 +- arch/x86/kernel/rtc.c | 2 +- drivers/base/power/trace.c | 2 +- drivers/rtc/rtc-cmos.c | 6 +++--- drivers/rtc/rtc-mc146818-lib.c | 37 ++++++++++++++++++++++++++-------- include/linux/mc146818rtc.h | 3 ++- 7 files changed, 38 insertions(+), 16 deletions(-) diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c index fb3025396ac9..cfdf90bc8b3f 100644 --- a/arch/alpha/kernel/rtc.c +++ b/arch/alpha/kernel/rtc.c @@ -80,7 +80,7 @@ init_rtc_epoch(void) static int alpha_rtc_read_time(struct device *dev, struct rtc_time *tm) { - int ret = mc146818_get_time(tm); + int ret = mc146818_get_time(tm, 10); if (ret < 0) { dev_err_ratelimited(dev, "unable to read current time\n"); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 71f336425e58..54732da52dc1 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1436,7 +1436,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) memset(&curr_time, 0, sizeof(struct rtc_time)); if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) { - if (unlikely(mc146818_get_time(&curr_time) < 0)) { + if (unlikely(mc146818_get_time(&curr_time, 10) < 0)) { pr_err_ratelimited("unable to read current time from RTC\n"); return IRQ_HANDLED; } diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 349046434513..a1028909502b 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -67,7 +67,7 @@ void mach_get_cmos_time(struct timespec64 *now) return; } - if (mc146818_get_time(&tm)) { + if (mc146818_get_time(&tm, 10)) { pr_err("Unable to read current time from RTC\n"); now->tv_sec = now->tv_nsec = 0; return; diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 72b7a92337b1..c2e925357474 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -120,7 +120,7 @@ static unsigned int read_magic_time(void) struct rtc_time time; unsigned int val; - if (mc146818_get_time(&time) < 0) { + if (mc146818_get_time(&time, 10) < 0) { pr_err("Unable to read current time from RTC\n"); return 0; } diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 843e7d8b71dd..09b269c7f00f 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -231,7 +231,7 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t) if (!pm_trace_rtc_valid()) return -EIO; - ret = mc146818_get_time(t); + ret = mc146818_get_time(t, 10); if (ret < 0) { dev_err_ratelimited(dev, "unable to read current time\n"); return ret; @@ -307,7 +307,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) * * Use the mc146818_avoid_UIP() function to avoid this. */ - if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p)) + if (!mc146818_avoid_UIP(cmos_read_alarm_callback, 10, &p)) return -EIO; if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { @@ -556,7 +556,7 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) * * Use mc146818_avoid_UIP() to avoid this. */ - if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p)) + if (!mc146818_avoid_UIP(cmos_set_alarm_callback, 10, &p)) return -ETIMEDOUT; cmos->alarm_expires = rtc_tm_to_time64(&t->time); diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 43a28e82674e..4c17b3cef11e 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -8,26 +8,31 @@ #include #endif +#define UIP_RECHECK_DELAY 100 /* usec */ +#define UIP_RECHECK_DELAY_MS (USEC_PER_MSEC / UIP_RECHECK_DELAY) +#define UIP_RECHECK_LOOPS_MS(x) (x / UIP_RECHECK_DELAY_MS) + /* * Execute a function while the UIP (Update-in-progress) bit of the RTC is - * unset. + * unset. The timeout is configurable by the caller in ms. * * Warning: callback may be executed more then once. */ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + int timeout, void *param) { int i; unsigned long flags; unsigned char seconds; - for (i = 0; i < 100; i++) { + for (i = 0; UIP_RECHECK_LOOPS_MS(i) < timeout; i++) { spin_lock_irqsave(&rtc_lock, flags); /* * Check whether there is an update in progress during which the * readout is unspecified. The maximum update time is ~2ms. Poll - * every 100 usec for completion. + * for completion. * * Store the second value before checking UIP so a long lasting * NMI which happens to hit after the UIP check cannot make @@ -37,7 +42,7 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { spin_unlock_irqrestore(&rtc_lock, flags); - udelay(100); + udelay(UIP_RECHECK_DELAY); continue; } @@ -56,7 +61,7 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), */ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { spin_unlock_irqrestore(&rtc_lock, flags); - udelay(100); + udelay(UIP_RECHECK_DELAY); continue; } @@ -72,6 +77,10 @@ bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), } spin_unlock_irqrestore(&rtc_lock, flags); + if (UIP_RECHECK_LOOPS_MS(i) >= 100) + pr_warn("Reading current time from RTC took around %li ms\n", + UIP_RECHECK_LOOPS_MS(i)); + return true; } return false; @@ -84,7 +93,7 @@ EXPORT_SYMBOL_GPL(mc146818_avoid_UIP); */ bool mc146818_does_rtc_work(void) { - return mc146818_avoid_UIP(NULL, NULL); + return mc146818_avoid_UIP(NULL, 10, NULL); } EXPORT_SYMBOL_GPL(mc146818_does_rtc_work); @@ -130,13 +139,25 @@ static void mc146818_get_time_callback(unsigned char seconds, void *param_in) p->ctrl = CMOS_READ(RTC_CONTROL); } -int mc146818_get_time(struct rtc_time *time) +/** + * mc146818_get_time - Get the current time from the RTC + * @time: pointer to struct rtc_time to store the current time + * @timeout: timeout value in ms + * + * This function reads the current time from the RTC and stores it in the + * provided struct rtc_time. The timeout parameter specifies the maximum + * time to wait for the RTC to become ready. + * + * Return: 0 on success, -ETIMEDOUT if the RTC did not become ready within + * the specified timeout, or another error code if an error occurred. + */ +int mc146818_get_time(struct rtc_time *time, int timeout) { struct mc146818_get_time_callback_param p = { .time = time }; - if (!mc146818_avoid_UIP(mc146818_get_time_callback, &p)) { + if (!mc146818_avoid_UIP(mc146818_get_time_callback, timeout, &p)) { memset(time, 0, sizeof(*time)); return -ETIMEDOUT; } diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index b0da04fe087b..34dfcc77f505 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -126,10 +126,11 @@ struct cmos_rtc_board_info { #endif /* ARCH_RTC_LOCATION */ bool mc146818_does_rtc_work(void); -int mc146818_get_time(struct rtc_time *time); +int mc146818_get_time(struct rtc_time *time, int timeout); int mc146818_set_time(struct rtc_time *time); bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), + int timeout, void *param); #endif /* _MC146818RTC_H */ From 40c23b5e0756dab27d00e218ff6e041269dc2f37 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 27 Nov 2023 23:36:53 -0600 Subject: [PATCH 052/187] rtc: Extend timeout for waiting for UIP to clear to 1s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cef9ecc8e938dd48a560f7dd9be1246359248d20 upstream. Specs don't say anything about UIP being cleared within 10ms. They only say that UIP won't occur for another 244uS. If a long NMI occurs while UIP is still updating it might not be possible to get valid data in 10ms. This has been observed in the wild that around s2idle some calls can take up to 480ms before UIP is clear. Adjust callers from outside an interrupt context to wait for up to a 1s instead of 10ms. Cc: # 6.1.y Fixes: ec5895c0f2d8 ("rtc: mc146818-lib: extract mc146818_avoid_UIP") Reported-by: Carsten Hatger Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217626 Tested-by: Mateusz JoÅ„czyk Reviewed-by: Mateusz JoÅ„czyk Acked-by: Mateusz JoÅ„czyk Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20231128053653.101798-5-mario.limonciello@amd.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/rtc.c | 2 +- drivers/base/power/trace.c | 2 +- drivers/rtc/rtc-cmos.c | 2 +- drivers/rtc/rtc-mc146818-lib.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index a1028909502b..344c141133e4 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -67,7 +67,7 @@ void mach_get_cmos_time(struct timespec64 *now) return; } - if (mc146818_get_time(&tm, 10)) { + if (mc146818_get_time(&tm, 1000)) { pr_err("Unable to read current time from RTC\n"); now->tv_sec = now->tv_nsec = 0; return; diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index c2e925357474..cd6e559648b2 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -120,7 +120,7 @@ static unsigned int read_magic_time(void) struct rtc_time time; unsigned int val; - if (mc146818_get_time(&time, 10) < 0) { + if (mc146818_get_time(&time, 1000) < 0) { pr_err("Unable to read current time from RTC\n"); return 0; } diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 09b269c7f00f..e0a798923ce0 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -231,7 +231,7 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t) if (!pm_trace_rtc_valid()) return -EIO; - ret = mc146818_get_time(t, 10); + ret = mc146818_get_time(t, 1000); if (ret < 0) { dev_err_ratelimited(dev, "unable to read current time\n"); return ret; diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 4c17b3cef11e..651bf3c279c7 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -93,7 +93,7 @@ EXPORT_SYMBOL_GPL(mc146818_avoid_UIP); */ bool mc146818_does_rtc_work(void) { - return mc146818_avoid_UIP(NULL, 10, NULL); + return mc146818_avoid_UIP(NULL, 1000, NULL); } EXPORT_SYMBOL_GPL(mc146818_does_rtc_work); From 13a6ceeb5bf12112a326896a36ed83f1fad7e8ab Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 18 Jan 2024 06:19:57 +1000 Subject: [PATCH 053/187] nouveau/vmm: don't set addr on the fail path to avoid warning commit cacea81390fd8c8c85404e5eb2adeb83d87a912e upstream. nvif_vmm_put gets called if addr is set, but if the allocation fails we don't need to call put, otherwise we get a warning like [523232.435671] ------------[ cut here ]------------ [523232.435674] WARNING: CPU: 8 PID: 1505697 at drivers/gpu/drm/nouveau/nvif/vmm.c:68 nvif_vmm_put+0x72/0x80 [nouveau] [523232.435795] Modules linked in: uinput rfcomm snd_seq_dummy snd_hrtimer nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables nfnetlink qrtr bnep sunrpc binfmt_misc intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common isst_if_common iwlmvm nfit libnvdimm vfat fat x86_pkg_temp_thermal intel_powerclamp mac80211 snd_soc_avs snd_soc_hda_codec coretemp snd_hda_ext_core snd_soc_core snd_hda_codec_realtek kvm_intel snd_hda_codec_hdmi snd_compress snd_hda_codec_generic ac97_bus snd_pcm_dmaengine snd_hda_intel libarc4 snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec kvm iwlwifi snd_hda_core btusb snd_hwdep btrtl snd_seq btintel irqbypass btbcm rapl snd_seq_device eeepc_wmi btmtk intel_cstate iTCO_wdt cfg80211 snd_pcm asus_wmi bluetooth intel_pmc_bxt iTCO_vendor_support snd_timer ledtrig_audio pktcdvd snd mei_me [523232.435828] sparse_keymap intel_uncore i2c_i801 platform_profile wmi_bmof mei pcspkr ioatdma soundcore i2c_smbus rfkill idma64 dca joydev acpi_tad loop zram nouveau drm_ttm_helper ttm video drm_exec drm_gpuvm gpu_sched crct10dif_pclmul i2c_algo_bit nvme crc32_pclmul crc32c_intel drm_display_helper polyval_clmulni nvme_core polyval_generic e1000e mxm_wmi cec ghash_clmulni_intel r8169 sha512_ssse3 nvme_common wmi pinctrl_sunrisepoint uas usb_storage ip6_tables ip_tables fuse [523232.435849] CPU: 8 PID: 1505697 Comm: gnome-shell Tainted: G W 6.6.0-rc7-nvk-uapi+ #12 [523232.435851] Hardware name: System manufacturer System Product Name/ROG STRIX X299-E GAMING II, BIOS 1301 09/24/2021 [523232.435852] RIP: 0010:nvif_vmm_put+0x72/0x80 [nouveau] [523232.435934] Code: 00 00 48 89 e2 be 02 00 00 00 48 c7 04 24 00 00 00 00 48 89 44 24 08 e8 fc bf ff ff 85 c0 75 0a 48 c7 43 08 00 00 00 00 eb b3 <0f> 0b eb f2 e8 f5 c9 b2 e6 0f 1f 44 00 00 90 90 90 90 90 90 90 90 [523232.435936] RSP: 0018:ffffc900077ffbd8 EFLAGS: 00010282 [523232.435937] RAX: 00000000fffffffe RBX: ffffc900077ffc00 RCX: 0000000000000010 [523232.435938] RDX: 0000000000000010 RSI: ffffc900077ffb38 RDI: ffffc900077ffbd8 [523232.435940] RBP: ffff888e1c4f2140 R08: 0000000000000000 R09: 0000000000000000 [523232.435940] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888503811800 [523232.435941] R13: ffffc900077ffca0 R14: ffff888e1c4f2140 R15: ffff88810317e1e0 [523232.435942] FS: 00007f933a769640(0000) GS:ffff88905fa00000(0000) knlGS:0000000000000000 [523232.435943] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [523232.435944] CR2: 00007f930bef7000 CR3: 00000005d0322001 CR4: 00000000003706e0 [523232.435945] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [523232.435946] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [523232.435964] Call Trace: [523232.435965] [523232.435966] ? nvif_vmm_put+0x72/0x80 [nouveau] [523232.436051] ? __warn+0x81/0x130 [523232.436055] ? nvif_vmm_put+0x72/0x80 [nouveau] [523232.436138] ? report_bug+0x171/0x1a0 [523232.436142] ? handle_bug+0x3c/0x80 [523232.436144] ? exc_invalid_op+0x17/0x70 [523232.436145] ? asm_exc_invalid_op+0x1a/0x20 [523232.436149] ? nvif_vmm_put+0x72/0x80 [nouveau] [523232.436230] ? nvif_vmm_put+0x64/0x80 [nouveau] [523232.436342] nouveau_vma_del+0x80/0xd0 [nouveau] [523232.436506] nouveau_vma_new+0x1a0/0x210 [nouveau] [523232.436671] nouveau_gem_object_open+0x1d0/0x1f0 [nouveau] [523232.436835] drm_gem_handle_create_tail+0xd1/0x180 [523232.436840] drm_prime_fd_to_handle_ioctl+0x12e/0x200 [523232.436844] ? __pfx_drm_prime_fd_to_handle_ioctl+0x10/0x10 [523232.436847] drm_ioctl_kernel+0xd3/0x180 [523232.436849] drm_ioctl+0x26d/0x4b0 [523232.436851] ? __pfx_drm_prime_fd_to_handle_ioctl+0x10/0x10 [523232.436855] nouveau_drm_ioctl+0x5a/0xb0 [nouveau] [523232.437032] __x64_sys_ioctl+0x94/0xd0 [523232.437036] do_syscall_64+0x5d/0x90 [523232.437040] ? syscall_exit_to_user_mode+0x2b/0x40 [523232.437044] ? do_syscall_64+0x6c/0x90 [523232.437046] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Reported-by: Faith Ekstrand Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240117213852.295565-1-airlied@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_vmm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index 67d6619fcd5e..29b5dedf6db8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -108,6 +108,9 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm, } else { ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0, mem->mem.size, &tmp); + if (ret) + goto done; + vma->addr = tmp.addr; } From 5d01dcda812fa01bc35cc86df048c4c8264e7077 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Fri, 22 Dec 2023 16:54:46 +0800 Subject: [PATCH 054/187] ubifs: ubifs_symlink: Fix memleak of inode->i_link in error path commit 1e022216dcd248326a5bb95609d12a6815bca4e2 upstream. For error handling path in ubifs_symlink(), inode will be marked as bad first, then iput() is invoked. If inode->i_link is initialized by fscrypt_encrypt_symlink() in encryption scenario, inode->i_link won't be freed by callchain ubifs_free_inode -> fscrypt_free_inode in error handling path, because make_bad_inode() has changed 'inode->i_mode' as 'S_IFREG'. Following kmemleak is easy to be reproduced by injecting error in ubifs_jnl_update() when doing symlink in encryption scenario: unreferenced object 0xffff888103da3d98 (size 8): comm "ln", pid 1692, jiffies 4294914701 (age 12.045s) backtrace: kmemdup+0x32/0x70 __fscrypt_encrypt_symlink+0xed/0x1c0 ubifs_symlink+0x210/0x300 [ubifs] vfs_symlink+0x216/0x360 do_symlinkat+0x11a/0x190 do_syscall_64+0x3b/0xe0 There are two ways fixing it: 1. Remove make_bad_inode() in error handling path. We can do that because ubifs_evict_inode() will do same processes for good symlink inode and bad symlink inode, for inode->i_nlink checking is before is_bad_inode(). 2. Free inode->i_link before marking inode bad. Method 2 is picked, it has less influence, personally, I think. Cc: stable@vger.kernel.org Fixes: 2c58d548f570 ("fscrypt: cache decrypted symlink target in ->i_link") Signed-off-by: Zhihao Cheng Suggested-by: Eric Biggers Reviewed-by: Eric Biggers Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 66ba57a139d2..4d9782e2fab1 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1226,6 +1226,8 @@ static int ubifs_symlink(struct user_namespace *mnt_userns, struct inode *dir, dir_ui->ui_size = dir->i_size; mutex_unlock(&dir_ui->ui_mutex); out_inode: + /* Free inode->i_link before inode is marked as bad. */ + fscrypt_free_inode(inode); make_bad_inode(inode); iput(inode); out_fname: From 367a47ef4cccbb294b159c53b14a5abbd124b642 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 1 Dec 2023 14:59:36 -0500 Subject: [PATCH 055/187] mm/rmap: fix misplaced parenthesis of a likely() commit f67f8d4a8c1e1ebc85a6cbdb9a7266f14863461c upstream. Running my yearly branch profiler to see where likely/unlikely annotation may be added or removed, I discovered this: correct incorrect % Function File Line ------- --------- - -------- ---- ---- 0 457918 100 page_try_dup_anon_rmap rmap.h 264 [..] 458021 0 0 page_try_dup_anon_rmap rmap.h 265 I thought it was interesting that line 264 of rmap.h had a 100% incorrect annotation, but the line directly below it was 100% correct. Looking at the code: if (likely(!is_device_private_page(page) && unlikely(page_needs_cow_for_dma(vma, page)))) It didn't make sense. The "likely()" was around the entire if statement (not just the "!is_device_private_page(page)"), which also included the "unlikely()" portion of that if condition. If the unlikely portion is unlikely to be true, that would make the entire if condition unlikely to be true, so it made no sense at all to say the entire if condition is true. What is more likely to be likely is just the first part of the if statement before the && operation. It's likely to be a misplaced parenthesis. And after making the if condition broken into a likely() && unlikely(), both now appear to be correct! Link: https://lkml.kernel.org/r/20231201145936.5ddfdb50@gandalf.local.home Fixes:fb3d824d1a46c ("mm/rmap: split page_dup_rmap() into page_dup_file_rmap() and page_try_dup_anon_rmap()") Signed-off-by: Steven Rostedt (Google) Acked-by: Vlastimil Babka Cc: David Hildenbrand Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/rmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 2bdba700bc3e..7d1ecd9b4373 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -250,8 +250,8 @@ static inline int page_try_dup_anon_rmap(struct page *page, bool compound, * guarantee the pinned page won't be randomly replaced in the * future on write faults. */ - if (likely(!is_device_private_page(page) && - unlikely(page_needs_cow_for_dma(vma, page)))) + if (likely(!is_device_private_page(page)) && + unlikely(page_needs_cow_for_dma(vma, page))) return -EBUSY; ClearPageAnonExclusive(page); From 68ed9e33324021e9d6b798e9db00ca3093d2012a Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Fri, 13 Oct 2023 18:34:27 +0530 Subject: [PATCH 056/187] mm/sparsemem: fix race in accessing memory_section->usage commit 5ec8e8ea8b7783fab150cf86404fc38cb4db8800 upstream. The below race is observed on a PFN which falls into the device memory region with the system memory configuration where PFN's are such that [ZONE_NORMAL ZONE_DEVICE ZONE_NORMAL]. Since normal zone start and end pfn contains the device memory PFN's as well, the compaction triggered will try on the device memory PFN's too though they end up in NOP(because pfn_to_online_page() returns NULL for ZONE_DEVICE memory sections). When from other core, the section mappings are being removed for the ZONE_DEVICE region, that the PFN in question belongs to, on which compaction is currently being operated is resulting into the kernel crash with CONFIG_SPASEMEM_VMEMAP enabled. The crash logs can be seen at [1]. compact_zone() memunmap_pages ------------- --------------- __pageblock_pfn_to_page ...... (a)pfn_valid(): valid_section()//return true (b)__remove_pages()-> sparse_remove_section()-> section_deactivate(): [Free the array ms->usage and set ms->usage = NULL] pfn_section_valid() [Access ms->usage which is NULL] NOTE: From the above it can be said that the race is reduced to between the pfn_valid()/pfn_section_valid() and the section deactivate with SPASEMEM_VMEMAP enabled. The commit b943f045a9af("mm/sparse: fix kernel crash with pfn_section_valid check") tried to address the same problem by clearing the SECTION_HAS_MEM_MAP with the expectation of valid_section() returns false thus ms->usage is not accessed. Fix this issue by the below steps: a) Clear SECTION_HAS_MEM_MAP before freeing the ->usage. b) RCU protected read side critical section will either return NULL when SECTION_HAS_MEM_MAP is cleared or can successfully access ->usage. c) Free the ->usage with kfree_rcu() and set ms->usage = NULL. No attempt will be made to access ->usage after this as the SECTION_HAS_MEM_MAP is cleared thus valid_section() return false. Thanks to David/Pavan for their inputs on this patch. [1] https://lore.kernel.org/linux-mm/994410bb-89aa-d987-1f50-f514903c55aa@quicinc.com/ On Snapdragon SoC, with the mentioned memory configuration of PFN's as [ZONE_NORMAL ZONE_DEVICE ZONE_NORMAL], we are able to see bunch of issues daily while testing on a device farm. For this particular issue below is the log. Though the below log is not directly pointing to the pfn_section_valid(){ ms->usage;}, when we loaded this dump on T32 lauterbach tool, it is pointing. [ 540.578056] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 540.578068] Mem abort info: [ 540.578070] ESR = 0x0000000096000005 [ 540.578073] EC = 0x25: DABT (current EL), IL = 32 bits [ 540.578077] SET = 0, FnV = 0 [ 540.578080] EA = 0, S1PTW = 0 [ 540.578082] FSC = 0x05: level 1 translation fault [ 540.578085] Data abort info: [ 540.578086] ISV = 0, ISS = 0x00000005 [ 540.578088] CM = 0, WnR = 0 [ 540.579431] pstate: 82400005 (Nzcv daif +PAN -UAO +TCO -DIT -SSBSBTYPE=--) [ 540.579436] pc : __pageblock_pfn_to_page+0x6c/0x14c [ 540.579454] lr : compact_zone+0x994/0x1058 [ 540.579460] sp : ffffffc03579b510 [ 540.579463] x29: ffffffc03579b510 x28: 0000000000235800 x27:000000000000000c [ 540.579470] x26: 0000000000235c00 x25: 0000000000000068 x24:ffffffc03579b640 [ 540.579477] x23: 0000000000000001 x22: ffffffc03579b660 x21:0000000000000000 [ 540.579483] x20: 0000000000235bff x19: ffffffdebf7e3940 x18:ffffffdebf66d140 [ 540.579489] x17: 00000000739ba063 x16: 00000000739ba063 x15:00000000009f4bff [ 540.579495] x14: 0000008000000000 x13: 0000000000000000 x12:0000000000000001 [ 540.579501] x11: 0000000000000000 x10: 0000000000000000 x9 :ffffff897d2cd440 [ 540.579507] x8 : 0000000000000000 x7 : 0000000000000000 x6 :ffffffc03579b5b4 [ 540.579512] x5 : 0000000000027f25 x4 : ffffffc03579b5b8 x3 :0000000000000001 [ 540.579518] x2 : ffffffdebf7e3940 x1 : 0000000000235c00 x0 :0000000000235800 [ 540.579524] Call trace: [ 540.579527] __pageblock_pfn_to_page+0x6c/0x14c [ 540.579533] compact_zone+0x994/0x1058 [ 540.579536] try_to_compact_pages+0x128/0x378 [ 540.579540] __alloc_pages_direct_compact+0x80/0x2b0 [ 540.579544] __alloc_pages_slowpath+0x5c0/0xe10 [ 540.579547] __alloc_pages+0x250/0x2d0 [ 540.579550] __iommu_dma_alloc_noncontiguous+0x13c/0x3fc [ 540.579561] iommu_dma_alloc+0xa0/0x320 [ 540.579565] dma_alloc_attrs+0xd4/0x108 [quic_charante@quicinc.com: use kfree_rcu() in place of synchronize_rcu(), per David] Link: https://lkml.kernel.org/r/1698403778-20938-1-git-send-email-quic_charante@quicinc.com Link: https://lkml.kernel.org/r/1697202267-23600-1-git-send-email-quic_charante@quicinc.com Fixes: f46edbd1b151 ("mm/sparsemem: add helpers track active portions of a section at boot") Signed-off-by: Charan Teja Kalla Cc: Aneesh Kumar K.V Cc: Dan Williams Cc: David Hildenbrand Cc: Mel Gorman Cc: Oscar Salvador Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 14 +++++++++++--- mm/sparse.c | 17 +++++++++-------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 323ee36df683..638d2124d1cb 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1622,6 +1622,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) struct mem_section_usage { + struct rcu_head rcu; #ifdef CONFIG_SPARSEMEM_VMEMMAP DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); #endif @@ -1815,7 +1816,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); - return test_bit(idx, ms->usage->subsection_map); + return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) @@ -1839,6 +1840,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) static inline int pfn_valid(unsigned long pfn) { struct mem_section *ms; + int ret; /* * Ensure the upper PAGE_SHIFT bits are clear in the @@ -1852,13 +1854,19 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - if (!valid_section(ms)) + rcu_read_lock(); + if (!valid_section(ms)) { + rcu_read_unlock(); return 0; + } /* * Traditionally early sections always returned pfn_valid() for * the entire section-sized span. */ - return early_section(ms) || pfn_section_valid(ms, pfn); + ret = early_section(ms) || pfn_section_valid(ms, pfn); + rcu_read_unlock(); + + return ret; } #endif diff --git a/mm/sparse.c b/mm/sparse.c index e5a8a3a0edd7..05d1e7b6c6db 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -792,6 +792,13 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, if (empty) { unsigned long section_nr = pfn_to_section_nr(pfn); + /* + * Mark the section invalid so that valid_section() + * return false. This prevents code from dereferencing + * ms->usage array. + */ + ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; + /* * When removing an early section, the usage map is kept (as the * usage maps of other sections fall into the same page). It @@ -800,16 +807,10 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, * was allocated during boot. */ if (!PageReserved(virt_to_page(ms->usage))) { - kfree(ms->usage); - ms->usage = NULL; + kfree_rcu(ms->usage, rcu); + WRITE_ONCE(ms->usage, NULL); } memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); - /* - * Mark the section invalid so that valid_section() - * return false. This prevents code from dereferencing - * ms->usage array. - */ - ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; } /* From 362be9ec328a3904ecedb408c4f28c047833e747 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Nov 2023 20:25:58 -0500 Subject: [PATCH 057/187] rename(): fix the locking of subdirectories commit 22e111ed6c83dcde3037fc81176012721bc34c0b upstream. We should never lock two subdirectories without having taken ->s_vfs_rename_mutex; inode pointer order or not, the "order" proposed in 28eceeda130f "fs: Lock moved directories" is not transitive, with the usual consequences. The rationale for locking renamed subdirectory in all cases was the possibility of race between rename modifying .. in a subdirectory to reflect the new parent and another thread modifying the same subdirectory. For a lot of filesystems that's not a problem, but for some it can lead to trouble (e.g. the case when short directory contents is kept in the inode, but creating a file in it might push it across the size limit and copy its contents into separate data block(s)). However, we need that only in case when the parent does change - otherwise ->rename() doesn't need to do anything with .. entry in the first place. Some instances are lazy and do a tautological update anyway, but it's really not hard to avoid. Amended locking rules for rename(): find the parent(s) of source and target if source and target have the same parent lock the common parent else lock ->s_vfs_rename_mutex lock both parents, in ancestor-first order; if neither is an ancestor of another, lock the parent of source first. find the source and target. if source and target have the same parent if operation is an overwriting rename of a subdirectory lock the target subdirectory else if source is a subdirectory lock the source if target is a subdirectory lock the target lock non-directories involved, in inode pointer order if both source and target are such. That way we are guaranteed that parents are locked (for obvious reasons), that any renamed non-directory is locked (nfsd relies upon that), that any victim is locked (emptiness check needs that, among other things) and subdirectory that changes parent is locked (needed to protect the update of .. entries). We are also guaranteed that any operation locking more than one directory either takes ->s_vfs_rename_mutex or locks a parent followed by its child. Cc: stable@vger.kernel.org Fixes: 28eceeda130f "fs: Lock moved directories" Reviewed-by: Jan Kara Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- .../filesystems/directory-locking.rst | 29 ++++----- Documentation/filesystems/locking.rst | 5 +- Documentation/filesystems/porting.rst | 18 ++++++ fs/namei.c | 60 ++++++++++++------- 4 files changed, 74 insertions(+), 38 deletions(-) diff --git a/Documentation/filesystems/directory-locking.rst b/Documentation/filesystems/directory-locking.rst index dccd61c7c5c3..193c22687851 100644 --- a/Documentation/filesystems/directory-locking.rst +++ b/Documentation/filesystems/directory-locking.rst @@ -22,13 +22,16 @@ exclusive. 3) object removal. Locking rules: caller locks parent, finds victim, locks victim and calls the method. Locks are exclusive. -4) rename() that is _not_ cross-directory. Locking rules: caller locks the -parent and finds source and target. We lock both (provided they exist). If we -need to lock two inodes of different type (dir vs non-dir), we lock directory -first. If we need to lock two inodes of the same type, lock them in inode -pointer order. Then call the method. All locks are exclusive. -NB: we might get away with locking the source (and target in exchange -case) shared. +4) rename() that is _not_ cross-directory. Locking rules: caller locks +the parent and finds source and target. Then we decide which of the +source and target need to be locked. Source needs to be locked if it's a +non-directory; target - if it's a non-directory or about to be removed. +Take the locks that need to be taken, in inode pointer order if need +to take both (that can happen only when both source and target are +non-directories - the source because it wouldn't be locked otherwise +and the target because mixing directory and non-directory is allowed +only with RENAME_EXCHANGE, and that won't be removing the target). +After the locks had been taken, call the method. All locks are exclusive. 5) link creation. Locking rules: @@ -44,20 +47,17 @@ rules: * lock the filesystem * lock parents in "ancestors first" order. If one is not ancestor of - the other, lock them in inode pointer order. + the other, lock the parent of source first. * find source and target. * if old parent is equal to or is a descendent of target fail with -ENOTEMPTY * if new parent is equal to or is a descendent of source fail with -ELOOP - * Lock both the source and the target provided they exist. If we - need to lock two inodes of different type (dir vs non-dir), we lock - the directory first. If we need to lock two inodes of the same type, - lock them in inode pointer order. + * Lock subdirectories involved (source before target). + * Lock non-directories involved, in inode pointer order. * call the method. -All ->i_rwsem are taken exclusive. Again, we might get away with locking -the source (and target in exchange case) shared. +All ->i_rwsem are taken exclusive. The rules above obviously guarantee that all directories that are going to be read, modified or removed by method will be locked by caller. @@ -67,6 +67,7 @@ If no directory is its own ancestor, the scheme above is deadlock-free. Proof: +[XXX: will be updated once we are done massaging the lock_rename()] First of all, at any moment we have a linear ordering of the objects - A < B iff (A is an ancestor of B) or (B is not an ancestor of A and ptr(A) < ptr(B)). diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 8f737e76935c..555215c416d9 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -99,7 +99,7 @@ symlink: exclusive mkdir: exclusive unlink: exclusive (both) rmdir: exclusive (both)(see below) -rename: exclusive (all) (see below) +rename: exclusive (both parents, some children) (see below) readlink: no get_link: no setattr: exclusive @@ -119,6 +119,9 @@ fileattr_set: exclusive Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem exclusive on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. + ->unlink() and ->rename() have ->i_rwsem exclusive on all non-directories + involved. + ->rename() has ->i_rwsem exclusive on any subdirectory that changes parent. See Documentation/filesystems/directory-locking.rst for more detailed discussion of the locking scheme for directory operations. diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index df0dc37e6f58..c6aaf340f179 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -943,3 +943,21 @@ file pointer instead of struct dentry pointer. d_tmpfile() is similarly changed to simplify callers. The passed file is in a non-open state and on success must be opened before returning (e.g. by calling finish_open_simple()). + +--- + +**mandatory** + +If ->rename() update of .. on cross-directory move needs an exclusion with +directory modifications, do *not* lock the subdirectory in question in your +->rename() - it's done by the caller now [that item should've been added in +28eceeda130f "fs: Lock moved directories"]. + +--- + +**mandatory** + +On same-directory ->rename() the (tautological) update of .. is not protected +by any locks; just don't do it if the old parent is the same as the new one. +We really can't lock two subdirectories in same-directory rename - not without +deadlocks. diff --git a/fs/namei.c b/fs/namei.c index b5578f4ce5d6..9e336b408ed1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3017,20 +3017,14 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) p = d_ancestor(p2, p1); if (p) { inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); - inode_lock_nested(p1->d_inode, I_MUTEX_CHILD); + inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2); return p; } p = d_ancestor(p1, p2); - if (p) { - inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); - inode_lock_nested(p2->d_inode, I_MUTEX_CHILD); - return p; - } - - lock_two_inodes(p1->d_inode, p2->d_inode, - I_MUTEX_PARENT, I_MUTEX_PARENT2); - return NULL; + inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); + inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); + return p; } /* @@ -4735,11 +4729,12 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * * a) we can get into loop creation. * b) race potential - two innocent renames can create a loop together. - * That's where 4.4 screws up. Current fix: serialization on + * That's where 4.4BSD screws up. Current fix: serialization on * sb->s_vfs_rename_mutex. We might be more accurate, but that's another * story. - * c) we have to lock _four_ objects - parents and victim (if it exists), - * and source. + * c) we may have to lock up to _four_ objects - parents and victim (if it exists), + * and source (if it's a non-directory or a subdirectory that moves to + * different parent). * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change @@ -4771,6 +4766,7 @@ int vfs_rename(struct renamedata *rd) bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; struct name_snapshot old_name; + bool lock_old_subdir, lock_new_subdir; if (source == target) return 0; @@ -4824,15 +4820,32 @@ int vfs_rename(struct renamedata *rd) take_dentry_name_snapshot(&old_name, old_dentry); dget(new_dentry); /* - * Lock all moved children. Moved directories may need to change parent - * pointer so they need the lock to prevent against concurrent - * directory changes moving parent pointer. For regular files we've - * historically always done this. The lockdep locking subclasses are - * somewhat arbitrary but RENAME_EXCHANGE in particular can swap - * regular files and directories so it's difficult to tell which - * subclasses to use. + * Lock children. + * The source subdirectory needs to be locked on cross-directory + * rename or cross-directory exchange since its parent changes. + * The target subdirectory needs to be locked on cross-directory + * exchange due to parent change and on any rename due to becoming + * a victim. + * Non-directories need locking in all cases (for NFS reasons); + * they get locked after any subdirectories (in inode address order). + * + * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE. + * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex. */ - lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2); + lock_old_subdir = new_dir != old_dir; + lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE); + if (is_dir) { + if (lock_old_subdir) + inode_lock_nested(source, I_MUTEX_CHILD); + if (target && (!new_is_dir || lock_new_subdir)) + inode_lock(target); + } else if (new_is_dir) { + if (lock_new_subdir) + inode_lock_nested(target, I_MUTEX_CHILD); + inode_lock(source); + } else { + lock_two_nondirectories(source, target); + } error = -EPERM; if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target))) @@ -4880,8 +4893,9 @@ int vfs_rename(struct renamedata *rd) d_exchange(old_dentry, new_dentry); } out: - inode_unlock(source); - if (target) + if (!is_dir || lock_old_subdir) + inode_unlock(source); + if (target && (!new_is_dir || lock_new_subdir)) inode_unlock(target); dput(new_dentry); if (!error) { From 45ec1b7accd5fbc05d5faf8be8093fdc4a30a061 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 30 Oct 2023 17:14:47 -0400 Subject: [PATCH 058/187] serial: sc16is7xx: improve regmap debugfs by using one regmap per port commit 3837a0379533aabb9e4483677077479f7c6aa910 upstream. With this current driver regmap implementation, it is hard to make sense of the register addresses displayed using the regmap debugfs interface, because they do not correspond to the actual register addresses documented in the datasheet. For example, register 1 is displayed as registers 04 thru 07: $ cat /sys/kernel/debug/regmap/spi0.0/registers 04: 10 -> Port 0, register offset 1 05: 10 -> Port 1, register offset 1 06: 00 -> Port 2, register offset 1 -> invalid 07: 00 -> port 3, register offset 1 -> invalid ... The reason is that bits 0 and 1 of the register address correspond to the channel (port) bits, so the register address itself starts at bit 2, and we must 'mentally' shift each register address by 2 bits to get its real address/offset. Also, only channels 0 and 1 are supported by the chip, so channel mask combinations of 10b and 11b are invalid, and the display of these registers is useless. This patch adds a separate regmap configuration for each port, similar to what is done in the max310x driver, so that register addresses displayed match the register addresses in the chip datasheet. Also, each port now has its own debugfs entry. Example with new regmap implementation: $ cat /sys/kernel/debug/regmap/spi0.0-port0/registers 1: 10 2: 01 3: 00 ... $ cat /sys/kernel/debug/regmap/spi0.0-port1/registers 1: 10 2: 01 3: 00 As an added bonus, this also simplifies some operations (read/write/modify) because it is no longer necessary to manually shift register addresses. Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231030211447.974779-1-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 143 +++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 62 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index db33790e6675..efe53dfcac3b 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -301,8 +301,8 @@ /* Misc definitions */ +#define SC16IS7XX_SPI_READ_BIT BIT(7) #define SC16IS7XX_FIFO_SIZE (64) -#define SC16IS7XX_REG_SHIFT 2 #define SC16IS7XX_GPIOS_PER_BANK 4 struct sc16is7xx_devtype { @@ -324,6 +324,7 @@ struct sc16is7xx_one_config { struct sc16is7xx_one { struct uart_port port; u8 line; + struct regmap *regmap; struct kthread_work tx_work; struct kthread_work reg_work; struct kthread_delayed_work ms_work; @@ -362,48 +363,37 @@ static void sc16is7xx_stop_tx(struct uart_port *port); #define to_sc16is7xx_port(p,e) ((container_of((p), struct sc16is7xx_port, e))) #define to_sc16is7xx_one(p,e) ((container_of((p), struct sc16is7xx_one, e))) -static int sc16is7xx_line(struct uart_port *port) -{ - struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - - return one->line; -} - static u8 sc16is7xx_port_read(struct uart_port *port, u8 reg) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned int val = 0; - const u8 line = sc16is7xx_line(port); - regmap_read(s->regmap, (reg << SC16IS7XX_REG_SHIFT) | line, &val); + regmap_read(one->regmap, reg, &val); return val; } static void sc16is7xx_port_write(struct uart_port *port, u8 reg, u8 val) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - regmap_write(s->regmap, (reg << SC16IS7XX_REG_SHIFT) | line, val); + regmap_write(one->regmap, reg, val); } static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen) { struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); - u8 addr = (SC16IS7XX_RHR_REG << SC16IS7XX_REG_SHIFT) | line; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - regcache_cache_bypass(s->regmap, true); - regmap_raw_read(s->regmap, addr, s->buf, rxlen); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, true); + regmap_raw_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen); + regcache_cache_bypass(one->regmap, false); } static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send) { struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); - u8 addr = (SC16IS7XX_THR_REG << SC16IS7XX_REG_SHIFT) | line; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); /* * Don't send zero-length data, at least on SPI it confuses the chip @@ -412,19 +402,17 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send) if (unlikely(!to_send)) return; - regcache_cache_bypass(s->regmap, true); - regmap_raw_write(s->regmap, addr, s->buf, to_send); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, true); + regmap_raw_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send); + regcache_cache_bypass(one->regmap, false); } static void sc16is7xx_port_update(struct uart_port *port, u8 reg, u8 mask, u8 val) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); - const u8 line = sc16is7xx_line(port); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - regmap_update_bits(s->regmap, (reg << SC16IS7XX_REG_SHIFT) | line, - mask, val); + regmap_update_bits(one->regmap, reg, mask, val); } static int sc16is7xx_alloc_line(void) @@ -479,7 +467,7 @@ static const struct sc16is7xx_devtype sc16is762_devtype = { static bool sc16is7xx_regmap_volatile(struct device *dev, unsigned int reg) { - switch (reg >> SC16IS7XX_REG_SHIFT) { + switch (reg) { case SC16IS7XX_RHR_REG: case SC16IS7XX_IIR_REG: case SC16IS7XX_LSR_REG: @@ -498,7 +486,7 @@ static bool sc16is7xx_regmap_volatile(struct device *dev, unsigned int reg) static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg) { - switch (reg >> SC16IS7XX_REG_SHIFT) { + switch (reg) { case SC16IS7XX_RHR_REG: return true; default: @@ -511,6 +499,7 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg) static int sc16is7xx_set_baud(struct uart_port *port, int baud) { struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); u8 lcr; u8 prescaler = 0; unsigned long clk = port->uartclk, div = clk / 16 / baud; @@ -542,12 +531,12 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) SC16IS7XX_LCR_CONF_MODE_B); /* Enable enhanced features */ - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); sc16is7xx_port_update(port, SC16IS7XX_EFR_REG, SC16IS7XX_EFR_ENABLE_BIT, SC16IS7XX_EFR_ENABLE_BIT); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Put LCR back to the normal mode */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); @@ -563,10 +552,10 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) SC16IS7XX_LCR_CONF_MODE_A); /* Write the new divisor */ - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); sc16is7xx_port_write(port, SC16IS7XX_DLH_REG, div / 256); sc16is7xx_port_write(port, SC16IS7XX_DLL_REG, div % 256); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Put LCR back to the normal mode */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); @@ -1094,7 +1083,7 @@ static void sc16is7xx_set_termios(struct uart_port *port, SC16IS7XX_LCR_CONF_MODE_B); /* Configure flow control */ - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); sc16is7xx_port_write(port, SC16IS7XX_XON1_REG, termios->c_cc[VSTART]); sc16is7xx_port_write(port, SC16IS7XX_XOFF1_REG, termios->c_cc[VSTOP]); @@ -1113,7 +1102,7 @@ static void sc16is7xx_set_termios(struct uart_port *port, SC16IS7XX_EFR_REG, SC16IS7XX_EFR_FLOWCTRL_BITS, flow); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Update LCR register */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); @@ -1164,7 +1153,6 @@ static int sc16is7xx_config_rs485(struct uart_port *port, struct ktermios *termi static int sc16is7xx_startup(struct uart_port *port) { struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); unsigned int val; unsigned long flags; @@ -1181,7 +1169,7 @@ static int sc16is7xx_startup(struct uart_port *port) sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(one->regmap, true); /* Enable write access to enhanced features and internal clock div */ sc16is7xx_port_update(port, SC16IS7XX_EFR_REG, @@ -1199,7 +1187,7 @@ static int sc16is7xx_startup(struct uart_port *port) SC16IS7XX_TCR_RX_RESUME(24) | SC16IS7XX_TCR_RX_HALT(48)); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(one->regmap, false); /* Now, initialize the UART */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_WORD_LEN_8); @@ -1457,7 +1445,7 @@ static int sc16is7xx_setup_mctrl_ports(struct sc16is7xx_port *s) if (s->mctrl_mask) regmap_update_bits( s->regmap, - SC16IS7XX_IOCONTROL_REG << SC16IS7XX_REG_SHIFT, + SC16IS7XX_IOCONTROL_REG, SC16IS7XX_IOCONTROL_MODEM_A_BIT | SC16IS7XX_IOCONTROL_MODEM_B_BIT, s->mctrl_mask); @@ -1472,7 +1460,7 @@ static const struct serial_rs485 sc16is7xx_rs485_supported = { static int sc16is7xx_probe(struct device *dev, const struct sc16is7xx_devtype *devtype, - struct regmap *regmap, int irq) + struct regmap *regmaps[], int irq) { unsigned long freq = 0, *pfreq = dev_get_platdata(dev); unsigned int val; @@ -1480,16 +1468,16 @@ static int sc16is7xx_probe(struct device *dev, int i, ret; struct sc16is7xx_port *s; - if (IS_ERR(regmap)) - return PTR_ERR(regmap); + for (i = 0; i < devtype->nr_uart; i++) + if (IS_ERR(regmaps[i])) + return PTR_ERR(regmaps[i]); /* * This device does not have an identification register that would * tell us if we are really connected to the correct device. * The best we can do is to check if communication is at all possible. */ - ret = regmap_read(regmap, - SC16IS7XX_LSR_REG << SC16IS7XX_REG_SHIFT, &val); + ret = regmap_read(regmaps[0], SC16IS7XX_LSR_REG, &val); if (ret < 0) return -EPROBE_DEFER; @@ -1523,7 +1511,7 @@ static int sc16is7xx_probe(struct device *dev, return -EINVAL; } - s->regmap = regmap; + s->regmap = regmaps[0]; s->devtype = devtype; dev_set_drvdata(dev, s); mutex_init(&s->efr_lock); @@ -1538,8 +1526,8 @@ static int sc16is7xx_probe(struct device *dev, sched_set_fifo(s->kworker_task); /* reset device, purging any pending irq / data */ - regmap_write(s->regmap, SC16IS7XX_IOCONTROL_REG << SC16IS7XX_REG_SHIFT, - SC16IS7XX_IOCONTROL_SRESET_BIT); + regmap_write(s->regmap, SC16IS7XX_IOCONTROL_REG, + SC16IS7XX_IOCONTROL_SRESET_BIT); for (i = 0; i < devtype->nr_uart; ++i) { s->p[i].line = i; @@ -1563,6 +1551,7 @@ static int sc16is7xx_probe(struct device *dev, s->p[i].port.ops = &sc16is7xx_ops; s->p[i].old_mctrl = 0; s->p[i].port.line = sc16is7xx_alloc_line(); + s->p[i].regmap = regmaps[i]; if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) { ret = -ENOMEM; @@ -1591,13 +1580,13 @@ static int sc16is7xx_probe(struct device *dev, sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); - regcache_cache_bypass(s->regmap, true); + regcache_cache_bypass(regmaps[i], true); /* Enable write access to enhanced features */ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_EFR_REG, SC16IS7XX_EFR_ENABLE_BIT); - regcache_cache_bypass(s->regmap, false); + regcache_cache_bypass(regmaps[i], false); /* Restore access to general registers */ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG, 0x00); @@ -1700,19 +1689,36 @@ static const struct of_device_id __maybe_unused sc16is7xx_dt_ids[] = { MODULE_DEVICE_TABLE(of, sc16is7xx_dt_ids); static struct regmap_config regcfg = { - .reg_bits = 7, - .pad_bits = 1, + .reg_bits = 5, + .pad_bits = 3, .val_bits = 8, .cache_type = REGCACHE_RBTREE, .volatile_reg = sc16is7xx_regmap_volatile, .precious_reg = sc16is7xx_regmap_precious, + .max_register = SC16IS7XX_EFCR_REG, }; +static const char *sc16is7xx_regmap_name(unsigned int port_id) +{ + static char buf[6]; + + snprintf(buf, sizeof(buf), "port%d", port_id); + + return buf; +} + +static unsigned int sc16is7xx_regmap_port_mask(unsigned int port_id) +{ + /* CH1,CH0 are at bits 2:1. */ + return port_id << 1; +} + #ifdef CONFIG_SERIAL_SC16IS7XX_SPI static int sc16is7xx_spi_probe(struct spi_device *spi) { const struct sc16is7xx_devtype *devtype; - struct regmap *regmap; + struct regmap *regmaps[2]; + unsigned int i; int ret; /* Setup SPI bus */ @@ -1737,11 +1743,20 @@ static int sc16is7xx_spi_probe(struct spi_device *spi) devtype = (struct sc16is7xx_devtype *)id_entry->driver_data; } - regcfg.max_register = (0xf << SC16IS7XX_REG_SHIFT) | - (devtype->nr_uart - 1); - regmap = devm_regmap_init_spi(spi, ®cfg); + for (i = 0; i < devtype->nr_uart; i++) { + regcfg.name = sc16is7xx_regmap_name(i); + /* + * If read_flag_mask is 0, the regmap code sets it to a default + * of 0x80. Since we specify our own mask, we must add the READ + * bit ourselves: + */ + regcfg.read_flag_mask = sc16is7xx_regmap_port_mask(i) | + SC16IS7XX_SPI_READ_BIT; + regcfg.write_flag_mask = sc16is7xx_regmap_port_mask(i); + regmaps[i] = devm_regmap_init_spi(spi, ®cfg); + } - return sc16is7xx_probe(&spi->dev, devtype, regmap, spi->irq); + return sc16is7xx_probe(&spi->dev, devtype, regmaps, spi->irq); } static void sc16is7xx_spi_remove(struct spi_device *spi) @@ -1780,7 +1795,8 @@ static int sc16is7xx_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { const struct sc16is7xx_devtype *devtype; - struct regmap *regmap; + struct regmap *regmaps[2]; + unsigned int i; if (i2c->dev.of_node) { devtype = device_get_match_data(&i2c->dev); @@ -1790,11 +1806,14 @@ static int sc16is7xx_i2c_probe(struct i2c_client *i2c, devtype = (struct sc16is7xx_devtype *)id->driver_data; } - regcfg.max_register = (0xf << SC16IS7XX_REG_SHIFT) | - (devtype->nr_uart - 1); - regmap = devm_regmap_init_i2c(i2c, ®cfg); + for (i = 0; i < devtype->nr_uart; i++) { + regcfg.name = sc16is7xx_regmap_name(i); + regcfg.read_flag_mask = sc16is7xx_regmap_port_mask(i); + regcfg.write_flag_mask = sc16is7xx_regmap_port_mask(i); + regmaps[i] = devm_regmap_init_i2c(i2c, ®cfg); + } - return sc16is7xx_probe(&i2c->dev, devtype, regmap, i2c->irq); + return sc16is7xx_probe(&i2c->dev, devtype, regmaps, i2c->irq); } static void sc16is7xx_i2c_remove(struct i2c_client *client) From 9bcb019aee475179aa439b785497dcb0392b5ef4 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 11 Dec 2023 12:13:48 -0500 Subject: [PATCH 059/187] serial: sc16is7xx: remove wasteful static buffer in sc16is7xx_regmap_name() commit 6bcab3c8acc88e265c570dea969fd04f137c8a4c upstream. Using a static buffer inside sc16is7xx_regmap_name() was a convenient and simple way to set the regmap name without having to allocate and free a buffer each time it is called. The drawback is that the static buffer wastes memory for nothing once regmap is fully initialized. Remove static buffer and use constant strings instead. This also avoids a truncation warning when using "%d" or "%u" in snprintf which was flagged by kernel test robot. Fixes: 3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port") Cc: # 6.1.x: 3837a03 serial: sc16is7xx: improve regmap debugfs by using one regmap per port Suggested-by: Andy Shevchenko Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231211171353.2901416-2-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index efe53dfcac3b..2c7c9cea0f3b 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1698,13 +1698,15 @@ static struct regmap_config regcfg = { .max_register = SC16IS7XX_EFCR_REG, }; -static const char *sc16is7xx_regmap_name(unsigned int port_id) +static const char *sc16is7xx_regmap_name(u8 port_id) { - static char buf[6]; - - snprintf(buf, sizeof(buf), "port%d", port_id); - - return buf; + switch (port_id) { + case 0: return "port0"; + case 1: return "port1"; + default: + WARN_ON(true); + return NULL; + } } static unsigned int sc16is7xx_regmap_port_mask(unsigned int port_id) From 6dca71e6e14addf8955dd00c7e93f8116a1e67f6 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 11 Dec 2023 12:13:49 -0500 Subject: [PATCH 060/187] serial: sc16is7xx: remove global regmap from struct sc16is7xx_port commit f6959c5217bd799bcb770b95d3c09b3244e175c6 upstream. Remove global struct regmap so that it is more obvious that this regmap is to be used only in the probe function. Also add a comment to that effect in probe function. Fixes: 3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port") Cc: Suggested-by: Andy Shevchenko Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231211171353.2901416-3-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 2c7c9cea0f3b..4a75ef37a264 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -335,7 +335,6 @@ struct sc16is7xx_one { struct sc16is7xx_port { const struct sc16is7xx_devtype *devtype; - struct regmap *regmap; struct clk *clk; #ifdef CONFIG_GPIOLIB struct gpio_chip gpio; @@ -1415,7 +1414,8 @@ static int sc16is7xx_setup_gpio_chip(struct sc16is7xx_port *s) /* * Configure ports designated to operate as modem control lines. */ -static int sc16is7xx_setup_mctrl_ports(struct sc16is7xx_port *s) +static int sc16is7xx_setup_mctrl_ports(struct sc16is7xx_port *s, + struct regmap *regmap) { int i; int ret; @@ -1444,7 +1444,7 @@ static int sc16is7xx_setup_mctrl_ports(struct sc16is7xx_port *s) if (s->mctrl_mask) regmap_update_bits( - s->regmap, + regmap, SC16IS7XX_IOCONTROL_REG, SC16IS7XX_IOCONTROL_MODEM_A_BIT | SC16IS7XX_IOCONTROL_MODEM_B_BIT, s->mctrl_mask); @@ -1476,6 +1476,10 @@ static int sc16is7xx_probe(struct device *dev, * This device does not have an identification register that would * tell us if we are really connected to the correct device. * The best we can do is to check if communication is at all possible. + * + * Note: regmap[0] is used in the probe function to access registers + * common to all channels/ports, as it is guaranteed to be present on + * all variants. */ ret = regmap_read(regmaps[0], SC16IS7XX_LSR_REG, &val); if (ret < 0) @@ -1511,7 +1515,6 @@ static int sc16is7xx_probe(struct device *dev, return -EINVAL; } - s->regmap = regmaps[0]; s->devtype = devtype; dev_set_drvdata(dev, s); mutex_init(&s->efr_lock); @@ -1526,7 +1529,7 @@ static int sc16is7xx_probe(struct device *dev, sched_set_fifo(s->kworker_task); /* reset device, purging any pending irq / data */ - regmap_write(s->regmap, SC16IS7XX_IOCONTROL_REG, + regmap_write(regmaps[0], SC16IS7XX_IOCONTROL_REG, SC16IS7XX_IOCONTROL_SRESET_BIT); for (i = 0; i < devtype->nr_uart; ++i) { @@ -1606,7 +1609,7 @@ static int sc16is7xx_probe(struct device *dev, s->p[u].irda_mode = true; } - ret = sc16is7xx_setup_mctrl_ports(s); + ret = sc16is7xx_setup_mctrl_ports(s, regmaps[0]); if (ret) goto out_ports; From f6c58552a8d9801b340211da41a94713fcb456f6 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 11 Dec 2023 12:13:50 -0500 Subject: [PATCH 061/187] serial: sc16is7xx: remove unused line structure member commit 41a308cbedb2a68a6831f0f2e992e296c4b8aff0 upstream. Now that the driver has been converted to use one regmap per port, the line structure member is no longer used, so remove it. Fixes: 3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port") Cc: Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231211171353.2901416-4-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 4a75ef37a264..067d7490bd38 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -323,7 +323,6 @@ struct sc16is7xx_one_config { struct sc16is7xx_one { struct uart_port port; - u8 line; struct regmap *regmap; struct kthread_work tx_work; struct kthread_work reg_work; @@ -1533,7 +1532,6 @@ static int sc16is7xx_probe(struct device *dev, SC16IS7XX_IOCONTROL_SRESET_BIT); for (i = 0; i < devtype->nr_uart; ++i) { - s->p[i].line = i; /* Initialize port data */ s->p[i].port.dev = dev; s->p[i].port.irq = irq; From 4b068e55bf5ea7bab4d8a282c6a24b03e80c0b68 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 11 Dec 2023 12:13:51 -0500 Subject: [PATCH 062/187] serial: sc16is7xx: change EFR lock to operate on each channels commit 4409df5866b7ff7686ba27e449ca97a92ee063c9 upstream. Now that the driver has been converted to use one regmap per port, change efr locking to operate on a channel basis instead of on the whole IC. Fixes: 3837a0379533 ("serial: sc16is7xx: improve regmap debugfs by using one regmap per port") Cc: # 6.1.x: 3837a03 serial: sc16is7xx: improve regmap debugfs by using one regmap per port Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231211171353.2901416-5-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 49 ++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 067d7490bd38..149ed55b091d 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -324,6 +324,7 @@ struct sc16is7xx_one_config { struct sc16is7xx_one { struct uart_port port; struct regmap *regmap; + struct mutex efr_lock; /* EFR registers access */ struct kthread_work tx_work; struct kthread_work reg_work; struct kthread_delayed_work ms_work; @@ -343,7 +344,6 @@ struct sc16is7xx_port { unsigned char buf[SC16IS7XX_FIFO_SIZE]; struct kthread_worker kworker; struct task_struct *kworker_task; - struct mutex efr_lock; struct sc16is7xx_one p[]; }; @@ -496,7 +496,6 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg) static int sc16is7xx_set_baud(struct uart_port *port, int baud) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); u8 lcr; u8 prescaler = 0; @@ -520,7 +519,7 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) * because the bulk of the interrupt processing is run as a workqueue * job in thread context. */ - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); lcr = sc16is7xx_port_read(port, SC16IS7XX_LCR_REG); @@ -539,7 +538,7 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) /* Put LCR back to the normal mode */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); - mutex_unlock(&s->efr_lock); + mutex_unlock(&one->efr_lock); sc16is7xx_port_update(port, SC16IS7XX_MCR_REG, SC16IS7XX_MCR_CLKSEL_BIT, @@ -709,11 +708,10 @@ static unsigned int sc16is7xx_get_hwmctrl(struct uart_port *port) static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) { struct uart_port *port = &one->port; - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); unsigned long flags; unsigned int status, changed; - lockdep_assert_held_once(&s->efr_lock); + lockdep_assert_held_once(&one->efr_lock); status = sc16is7xx_get_hwmctrl(port); changed = status ^ one->old_mctrl; @@ -739,15 +737,20 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) { + bool rc = true; struct uart_port *port = &s->p[portno].port; + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); + + mutex_lock(&one->efr_lock); do { unsigned int iir, rxlen; - struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG); - if (iir & SC16IS7XX_IIR_NO_INT_BIT) - return false; + if (iir & SC16IS7XX_IIR_NO_INT_BIT) { + rc = false; + goto out_port_irq; + } iir &= SC16IS7XX_IIR_ID_MASK; @@ -787,15 +790,17 @@ static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) break; } } while (0); - return true; + +out_port_irq: + mutex_unlock(&one->efr_lock); + + return rc; } static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) { struct sc16is7xx_port *s = (struct sc16is7xx_port *)dev_id; - mutex_lock(&s->efr_lock); - while (1) { bool keep_polling = false; int i; @@ -806,24 +811,22 @@ static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) break; } - mutex_unlock(&s->efr_lock); - return IRQ_HANDLED; } static void sc16is7xx_tx_proc(struct kthread_work *ws) { struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port); - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned long flags; if ((port->rs485.flags & SER_RS485_ENABLED) && (port->rs485.delay_rts_before_send > 0)) msleep(port->rs485.delay_rts_before_send); - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); sc16is7xx_handle_tx(port); - mutex_unlock(&s->efr_lock); + mutex_unlock(&one->efr_lock); spin_lock_irqsave(&port->lock, flags); sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT); @@ -930,9 +933,9 @@ static void sc16is7xx_ms_proc(struct kthread_work *ws) struct sc16is7xx_port *s = dev_get_drvdata(one->port.dev); if (one->port.state) { - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); sc16is7xx_update_mlines(one); - mutex_unlock(&s->efr_lock); + mutex_unlock(&one->efr_lock); kthread_queue_delayed_work(&s->kworker, &one->ms_work, HZ); } @@ -1016,7 +1019,6 @@ static void sc16is7xx_set_termios(struct uart_port *port, struct ktermios *termios, const struct ktermios *old) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned int lcr, flow = 0; int baud; @@ -1075,7 +1077,7 @@ static void sc16is7xx_set_termios(struct uart_port *port, port->ignore_status_mask |= SC16IS7XX_LSR_BRK_ERROR_MASK; /* As above, claim the mutex while accessing the EFR. */ - mutex_lock(&s->efr_lock); + mutex_lock(&one->efr_lock); sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); @@ -1105,7 +1107,7 @@ static void sc16is7xx_set_termios(struct uart_port *port, /* Update LCR register */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); - mutex_unlock(&s->efr_lock); + mutex_unlock(&one->efr_lock); /* Get baud rate generator configuration */ baud = uart_get_baud_rate(port, termios, old, @@ -1516,7 +1518,6 @@ static int sc16is7xx_probe(struct device *dev, s->devtype = devtype; dev_set_drvdata(dev, s); - mutex_init(&s->efr_lock); kthread_init_worker(&s->kworker); s->kworker_task = kthread_run(kthread_worker_fn, &s->kworker, @@ -1559,6 +1560,8 @@ static int sc16is7xx_probe(struct device *dev, goto out_ports; } + mutex_init(&s->p[i].efr_lock); + ret = uart_get_rs485_mode(&s->p[i].port); if (ret) goto out_ports; From 416b10d2817c94db86829fb92ad43ce7d002c573 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 11 Dec 2023 12:13:52 -0500 Subject: [PATCH 063/187] serial: sc16is7xx: convert from _raw_ to _noinc_ regmap functions for FIFO commit dbf4ab821804df071c8b566d9813083125e6d97b upstream. The SC16IS7XX IC supports a burst mode to access the FIFOs where the initial register address is sent ($00), followed by all the FIFO data without having to resend the register address each time. In this mode, the IC doesn't increment the register address for each R/W byte. The regmap_raw_read() and regmap_raw_write() are functions which can perform IO over multiple registers. They are currently used to read/write from/to the FIFO, and although they operate correctly in this burst mode on the SPI bus, they would corrupt the regmap cache if it was not disabled manually. The reason is that when the R/W size is more than 1 byte, these functions assume that the register address is incremented and handle the cache accordingly. Convert FIFO R/W functions to use the regmap _noinc_ versions in order to remove the manual cache control which was a workaround when using the _raw_ versions. FIFO registers are properly declared as volatile so cache will not be used/updated for FIFO accesses. Fixes: dfeae619d781 ("serial: sc16is7xx") Cc: Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231211171353.2901416-6-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 149ed55b091d..bbc392cd24e4 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -383,9 +383,7 @@ static void sc16is7xx_fifo_read(struct uart_port *port, unsigned int rxlen) struct sc16is7xx_port *s = dev_get_drvdata(port->dev); struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - regcache_cache_bypass(one->regmap, true); - regmap_raw_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen); - regcache_cache_bypass(one->regmap, false); + regmap_noinc_read(one->regmap, SC16IS7XX_RHR_REG, s->buf, rxlen); } static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send) @@ -400,9 +398,7 @@ static void sc16is7xx_fifo_write(struct uart_port *port, u8 to_send) if (unlikely(!to_send)) return; - regcache_cache_bypass(one->regmap, true); - regmap_raw_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send); - regcache_cache_bypass(one->regmap, false); + regmap_noinc_write(one->regmap, SC16IS7XX_THR_REG, s->buf, to_send); } static void sc16is7xx_port_update(struct uart_port *port, u8 reg, @@ -494,6 +490,11 @@ static bool sc16is7xx_regmap_precious(struct device *dev, unsigned int reg) return false; } +static bool sc16is7xx_regmap_noinc(struct device *dev, unsigned int reg) +{ + return reg == SC16IS7XX_RHR_REG; +} + static int sc16is7xx_set_baud(struct uart_port *port, int baud) { struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); @@ -1699,6 +1700,10 @@ static struct regmap_config regcfg = { .cache_type = REGCACHE_RBTREE, .volatile_reg = sc16is7xx_regmap_volatile, .precious_reg = sc16is7xx_regmap_precious, + .writeable_noinc_reg = sc16is7xx_regmap_noinc, + .readable_noinc_reg = sc16is7xx_regmap_noinc, + .max_raw_read = SC16IS7XX_FIFO_SIZE, + .max_raw_write = SC16IS7XX_FIFO_SIZE, .max_register = SC16IS7XX_EFCR_REG, }; From de8e41f78f76c7fb4d20f69e687b38a5122dff2a Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 21 Dec 2023 18:18:08 -0500 Subject: [PATCH 064/187] serial: sc16is7xx: fix invalid sc16is7xx_lines bitfield in case of probe error commit 8a1060ce974919f2a79807527ad82ac39336eda2 upstream. If an error occurs during probing, the sc16is7xx_lines bitfield may be left in a state that doesn't represent the correct state of lines allocation. For example, in a system with two SC16 devices, if an error occurs only during probing of channel (port) B of the second device, sc16is7xx_lines final state will be 00001011b instead of the expected 00000011b. This is caused in part because of the "i--" in the for/loop located in the out_ports: error path. Fix this by checking the return value of uart_add_one_port() and set line allocation bit only if this was successful. This allows the refactor of the obfuscated for(i--...) loop in the error path, and properly call uart_remove_one_port() only when needed, and properly unset line allocation bits. Also use same mechanism in remove() when calling uart_remove_one_port(). Fixes: c64349722d14 ("sc16is7xx: support multiple devices") Cc: Cc: Yury Norov Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231221231823.2327894-2-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 44 ++++++++++++++-------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index bbc392cd24e4..b31467cf2b70 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -409,19 +409,6 @@ static void sc16is7xx_port_update(struct uart_port *port, u8 reg, regmap_update_bits(one->regmap, reg, mask, val); } -static int sc16is7xx_alloc_line(void) -{ - int i; - - BUILD_BUG_ON(SC16IS7XX_MAX_DEVS > BITS_PER_LONG); - - for (i = 0; i < SC16IS7XX_MAX_DEVS; i++) - if (!test_and_set_bit(i, &sc16is7xx_lines)) - break; - - return i; -} - static void sc16is7xx_power(struct uart_port *port, int on) { sc16is7xx_port_update(port, SC16IS7XX_IER_REG, @@ -1534,6 +1521,13 @@ static int sc16is7xx_probe(struct device *dev, SC16IS7XX_IOCONTROL_SRESET_BIT); for (i = 0; i < devtype->nr_uart; ++i) { + s->p[i].port.line = find_first_zero_bit(&sc16is7xx_lines, + SC16IS7XX_MAX_DEVS); + if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) { + ret = -ERANGE; + goto out_ports; + } + /* Initialize port data */ s->p[i].port.dev = dev; s->p[i].port.irq = irq; @@ -1553,14 +1547,8 @@ static int sc16is7xx_probe(struct device *dev, s->p[i].port.rs485_supported = sc16is7xx_rs485_supported; s->p[i].port.ops = &sc16is7xx_ops; s->p[i].old_mctrl = 0; - s->p[i].port.line = sc16is7xx_alloc_line(); s->p[i].regmap = regmaps[i]; - if (s->p[i].port.line >= SC16IS7XX_MAX_DEVS) { - ret = -ENOMEM; - goto out_ports; - } - mutex_init(&s->p[i].efr_lock); ret = uart_get_rs485_mode(&s->p[i].port); @@ -1578,8 +1566,13 @@ static int sc16is7xx_probe(struct device *dev, kthread_init_work(&s->p[i].tx_work, sc16is7xx_tx_proc); kthread_init_work(&s->p[i].reg_work, sc16is7xx_reg_proc); kthread_init_delayed_work(&s->p[i].ms_work, sc16is7xx_ms_proc); + /* Register port */ - uart_add_one_port(&sc16is7xx_uart, &s->p[i].port); + ret = uart_add_one_port(&sc16is7xx_uart, &s->p[i].port); + if (ret) + goto out_ports; + + set_bit(s->p[i].port.line, &sc16is7xx_lines); /* Enable EFR */ sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG, @@ -1646,10 +1639,9 @@ static int sc16is7xx_probe(struct device *dev, #endif out_ports: - for (i--; i >= 0; i--) { - uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); - clear_bit(s->p[i].port.line, &sc16is7xx_lines); - } + for (i = 0; i < devtype->nr_uart; i++) + if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines)) + uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); kthread_stop(s->kworker_task); @@ -1671,8 +1663,8 @@ static void sc16is7xx_remove(struct device *dev) for (i = 0; i < s->devtype->nr_uart; i++) { kthread_cancel_delayed_work_sync(&s->p[i].ms_work); - uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); - clear_bit(s->p[i].port.line, &sc16is7xx_lines); + if (test_and_clear_bit(s->p[i].port.line, &sc16is7xx_lines)) + uart_remove_one_port(&sc16is7xx_uart, &s->p[i].port); sc16is7xx_power(&s->p[i].port, 0); } From 80beb4424d48701f0e4cf924fb5263ce98a796f8 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 21 Dec 2023 18:18:11 -0500 Subject: [PATCH 065/187] serial: sc16is7xx: remove obsolete loop in sc16is7xx_port_irq() commit ed647256e8f226241ecff7baaecdb8632ffc7ec1 upstream. Commit 834449872105 ("sc16is7xx: Fix for multi-channel stall") changed sc16is7xx_port_irq() from looping multiple times when there was still interrupts to serve. It simply changed the do {} while(1) loop to a do {} while(0) loop, which makes the loop itself now obsolete. Clean the code by removing this obsolete do {} while(0) loop. Fixes: 834449872105 ("sc16is7xx: Fix for multi-channel stall") Cc: Suggested-by: Andy Shevchenko Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231221231823.2327894-5-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 81 ++++++++++++++++------------------ 1 file changed, 39 insertions(+), 42 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index b31467cf2b70..6bce1a6c0496 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -726,58 +726,55 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) { bool rc = true; + unsigned int iir, rxlen; struct uart_port *port = &s->p[portno].port; struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); mutex_lock(&one->efr_lock); - do { - unsigned int iir, rxlen; + iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG); + if (iir & SC16IS7XX_IIR_NO_INT_BIT) { + rc = false; + goto out_port_irq; + } - iir = sc16is7xx_port_read(port, SC16IS7XX_IIR_REG); - if (iir & SC16IS7XX_IIR_NO_INT_BIT) { - rc = false; - goto out_port_irq; - } + iir &= SC16IS7XX_IIR_ID_MASK; - iir &= SC16IS7XX_IIR_ID_MASK; + switch (iir) { + case SC16IS7XX_IIR_RDI_SRC: + case SC16IS7XX_IIR_RLSE_SRC: + case SC16IS7XX_IIR_RTOI_SRC: + case SC16IS7XX_IIR_XOFFI_SRC: + rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG); - switch (iir) { - case SC16IS7XX_IIR_RDI_SRC: - case SC16IS7XX_IIR_RLSE_SRC: - case SC16IS7XX_IIR_RTOI_SRC: - case SC16IS7XX_IIR_XOFFI_SRC: - rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG); + /* + * There is a silicon bug that makes the chip report a + * time-out interrupt but no data in the FIFO. This is + * described in errata section 18.1.4. + * + * When this happens, read one byte from the FIFO to + * clear the interrupt. + */ + if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen) + rxlen = 1; - /* - * There is a silicon bug that makes the chip report a - * time-out interrupt but no data in the FIFO. This is - * described in errata section 18.1.4. - * - * When this happens, read one byte from the FIFO to - * clear the interrupt. - */ - if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen) - rxlen = 1; - - if (rxlen) - sc16is7xx_handle_rx(port, rxlen, iir); - break; + if (rxlen) + sc16is7xx_handle_rx(port, rxlen, iir); + break; /* CTSRTS interrupt comes only when CTS goes inactive */ - case SC16IS7XX_IIR_CTSRTS_SRC: - case SC16IS7XX_IIR_MSI_SRC: - sc16is7xx_update_mlines(one); - break; - case SC16IS7XX_IIR_THRI_SRC: - sc16is7xx_handle_tx(port); - break; - default: - dev_err_ratelimited(port->dev, - "ttySC%i: Unexpected interrupt: %x", - port->line, iir); - break; - } - } while (0); + case SC16IS7XX_IIR_CTSRTS_SRC: + case SC16IS7XX_IIR_MSI_SRC: + sc16is7xx_update_mlines(one); + break; + case SC16IS7XX_IIR_THRI_SRC: + sc16is7xx_handle_tx(port); + break; + default: + dev_err_ratelimited(port->dev, + "ttySC%i: Unexpected interrupt: %x", + port->line, iir); + break; + } out_port_irq: mutex_unlock(&one->efr_lock); From 0e0653d53af88120893a58164dff4dbb92c0b0b7 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 21 Dec 2023 18:18:12 -0500 Subject: [PATCH 066/187] serial: sc16is7xx: improve do/while loop in sc16is7xx_irq() commit d5078509c8b06c5c472a60232815e41af81c6446 upstream. Simplify and improve readability by replacing while(1) loop with do {} while, and by using the keep_polling variable as the exit condition, making it more explicit. Fixes: 834449872105 ("sc16is7xx: Fix for multi-channel stall") Cc: Suggested-by: Andy Shevchenko Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231221231823.2327894-6-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 6bce1a6c0496..ab9159441d02 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -784,17 +784,18 @@ static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) { + bool keep_polling; + struct sc16is7xx_port *s = (struct sc16is7xx_port *)dev_id; - while (1) { - bool keep_polling = false; + do { int i; + keep_polling = false; + for (i = 0; i < s->devtype->nr_uart; ++i) keep_polling |= sc16is7xx_port_irq(s, i); - if (!keep_polling) - break; - } + } while (keep_polling); return IRQ_HANDLED; } From 1d15da560175f45563c97533986f0a597f114080 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 8 Nov 2023 14:12:15 +0800 Subject: [PATCH 067/187] LoongArch/smp: Call rcutree_report_cpu_starting() earlier commit a2ccf46333d7b2cf9658f0d82ac74097c1542fae upstream. rcutree_report_cpu_starting() must be called before cpu_probe() to avoid the following lockdep splat that triggered by calling __alloc_pages() when CONFIG_PROVE_RCU_LIST=y: ============================= WARNING: suspicious RCU usage 6.6.0+ #980 Not tainted ----------------------------- kernel/locking/lockdep.c:3761 RCU-list traversed in non-reader section!! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 1 lock held by swapper/1/0: #0: 900000000c82ef98 (&pcp->lock){+.+.}-{2:2}, at: get_page_from_freelist+0x894/0x1790 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.0+ #980 Stack : 0000000000000001 9000000004f79508 9000000004893670 9000000100310000 90000001003137d0 0000000000000000 90000001003137d8 9000000004f79508 0000000000000000 0000000000000001 0000000000000000 90000000048a3384 203a656d616e2065 ca43677b3687e616 90000001002c3480 0000000000000008 000000000000009d 0000000000000000 0000000000000001 80000000ffffe0b8 000000000000000d 0000000000000033 0000000007ec0000 13bbf50562dad831 9000000005140748 0000000000000000 9000000004f79508 0000000000000004 0000000000000000 9000000005140748 90000001002bad40 0000000000000000 90000001002ba400 0000000000000000 9000000003573ec8 0000000000000000 00000000000000b0 0000000000000004 0000000000000000 0000000000070000 ... Call Trace: [<9000000003573ec8>] show_stack+0x38/0x150 [<9000000004893670>] dump_stack_lvl+0x74/0xa8 [<900000000360d2bc>] lockdep_rcu_suspicious+0x14c/0x190 [<900000000361235c>] __lock_acquire+0xd0c/0x2740 [<90000000036146f4>] lock_acquire+0x104/0x2c0 [<90000000048a955c>] _raw_spin_lock_irqsave+0x5c/0x90 [<900000000381cd5c>] rmqueue_bulk+0x6c/0x950 [<900000000381fc0c>] get_page_from_freelist+0xd4c/0x1790 [<9000000003821c6c>] __alloc_pages+0x1bc/0x3e0 [<9000000003583b40>] tlb_init+0x150/0x2a0 [<90000000035742a0>] per_cpu_trap_init+0xf0/0x110 [<90000000035712fc>] cpu_probe+0x3dc/0x7a0 [<900000000357ed20>] start_secondary+0x40/0xb0 [<9000000004897138>] smpboot_entry+0x54/0x58 raw_smp_processor_id() is required in order to avoid calling into lockdep before RCU has declared the CPU to be watched for readers. See also commit 29368e093921 ("x86/smpboot: Move rcu_cpu_starting() earlier"), commit de5d9dae150c ("s390/smp: move rcu_cpu_starting() earlier") and commit 99f070b62322 ("powerpc/smp: Call rcu_cpu_starting() earlier"). Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 14508d429ffa..434bfc1cd31a 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -471,8 +471,9 @@ asmlinkage void start_secondary(void) unsigned int cpu; sync_counter(); - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); set_my_cpu_offset(per_cpu_offset(cpu)); + rcu_cpu_starting(cpu); cpu_probe(); constant_clockevent_init(); From 3101b9fd749f87637238f87007e20daaf83fdb41 Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Fri, 24 Nov 2023 16:27:25 +0530 Subject: [PATCH 068/187] mm: page_alloc: unreserve highatomic page blocks before oom commit ac3f3b0a55518056bc80ed32a41931c99e1f7d81 upstream. __alloc_pages_direct_reclaim() is called from slowpath allocation where high atomic reserves can be unreserved after there is a progress in reclaim and yet no suitable page is found. Later should_reclaim_retry() gets called from slow path allocation to decide if the reclaim needs to be retried before OOM kill path is taken. should_reclaim_retry() checks the available(reclaimable + free pages) memory against the min wmark levels of a zone and returns: a) true, if it is above the min wmark so that slow path allocation will do the reclaim retries. b) false, thus slowpath allocation takes oom kill path. should_reclaim_retry() can also unreserves the high atomic reserves **but only after all the reclaim retries are exhausted.** In a case where there are almost none reclaimable memory and free pages contains mostly the high atomic reserves but allocation context can't use these high atomic reserves, makes the available memory below min wmark levels hence false is returned from should_reclaim_retry() leading the allocation request to take OOM kill path. This can turn into a early oom kill if high atomic reserves are holding lot of free memory and unreserving of them is not attempted. (early)OOM is encountered on a VM with the below state: [ 295.998653] Normal free:7728kB boost:0kB min:804kB low:1004kB high:1204kB reserved_highatomic:8192KB active_anon:4kB inactive_anon:0kB active_file:24kB inactive_file:24kB unevictable:1220kB writepending:0kB present:70732kB managed:49224kB mlocked:0kB bounce:0kB free_pcp:688kB local_pcp:492kB free_cma:0kB [ 295.998656] lowmem_reserve[]: 0 32 [ 295.998659] Normal: 508*4kB (UMEH) 241*8kB (UMEH) 143*16kB (UMEH) 33*32kB (UH) 7*64kB (UH) 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 7752kB Per above log, the free memory of ~7MB exist in the high atomic reserves is not freed up before falling back to oom kill path. Fix it by trying to unreserve the high atomic reserves in should_reclaim_retry() before __alloc_pages_direct_reclaim() can fallback to oom kill path. Link: https://lkml.kernel.org/r/1700823445-27531-1-git-send-email-quic_charante@quicinc.com Fixes: 0aaa29a56e4f ("mm, page_alloc: reserve pageblocks for high-order atomic allocations on demand") Signed-off-by: Charan Teja Kalla Reported-by: Chris Goldsworthy Suggested-by: Michal Hocko Acked-by: Michal Hocko Acked-by: David Rientjes Cc: Chris Goldsworthy Cc: David Hildenbrand Cc: Johannes Weiner Cc: Mel Gorman Cc: Pavankumar Kondeti Cc: Vlastimil Babka Cc: Joakim Tjernlund Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4583f8a42d91..c783806eefc9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4921,14 +4921,9 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, else (*no_progress_loops)++; - /* - * Make sure we converge to OOM if we cannot make any progress - * several times in the row. - */ - if (*no_progress_loops > MAX_RECLAIM_RETRIES) { - /* Before OOM, exhaust highatomic_reserve */ - return unreserve_highatomic_pageblock(ac, true); - } + if (*no_progress_loops > MAX_RECLAIM_RETRIES) + goto out; + /* * Keep reclaiming pages while there is a chance this will lead @@ -4971,6 +4966,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, schedule_timeout_uninterruptible(1); else cond_resched(); +out: + /* Before OOM, exhaust highatomic_reserve */ + if (!ret) + return unreserve_highatomic_pageblock(ac, true); + return ret; } From 8fa25e67fdcf77b4dd29a08d2638591a66c05fcc Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 23 Jan 2024 20:38:50 +0900 Subject: [PATCH 069/187] ksmbd: set v2 lease version on lease upgrade [ Upstream commit bb05367a66a9990d2c561282f5620bb1dbe40c28 ] If file opened with v2 lease is upgraded with v1 lease, smb server should response v2 lease create context to client. This patch fix smb2.lease.v2_epoch2 test failure. This test case assumes the following scenario: 1. smb2 create with v2 lease(R, LEASE1 key) 2. smb server return smb2 create response with v2 lease context(R, LEASE1 key, epoch + 1) 3. smb2 create with v1 lease(RH, LEASE1 key) 4. smb server return smb2 create response with v2 lease context(RH, LEASE1 key, epoch + 2) i.e. If same client(same lease key) try to open a file that is being opened with v2 lease with v1 lease, smb server should return v2 lease. Signed-off-by: Namjae Jeon Acked-by: Tom Talpey Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/oplock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index f1831e26adad..8adae5871e44 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1036,6 +1036,7 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2) lease2->duration = lease1->duration; lease2->flags = lease1->flags; lease2->epoch = lease1->epoch++; + lease2->version = lease1->version; } static int add_lease_global_list(struct oplock_info *opinfo) From e61fc656ceeaec65f19a92f0ffbeb562b7941e8d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 23 Jan 2024 20:38:51 +0900 Subject: [PATCH 070/187] ksmbd: fix potential circular locking issue in smb2_set_ea() [ Upstream commit 6fc0a265e1b932e5e97a038f99e29400a93baad0 ] smb2_set_ea() can be called in parent inode lock range. So add get_write argument to smb2_set_ea() not to call nested mnt_want_write(). Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 6826b562073e..6e2f8a2fcd72 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2321,11 +2321,12 @@ static noinline int create_smb2_pipe(struct ksmbd_work *work) * @eabuf: set info command buffer * @buf_len: set info command buffer length * @path: dentry path for get ea + * @get_write: get write access to a mount * * Return: 0 on success, otherwise error */ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, - const struct path *path) + const struct path *path, bool get_write) { struct user_namespace *user_ns = mnt_user_ns(path->mnt); char *attr_name = NULL, *value; @@ -3013,7 +3014,7 @@ int smb2_open(struct ksmbd_work *work) rc = smb2_set_ea(&ea_buf->ea, le32_to_cpu(ea_buf->ccontext.DataLength), - &path); + &path, false); if (rc == -EOPNOTSUPP) rc = 0; else if (rc) @@ -5990,7 +5991,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp, return -EINVAL; return smb2_set_ea((struct smb2_ea_info *)req->Buffer, - buf_len, &fp->filp->f_path); + buf_len, &fp->filp->f_path, true); } case FILE_POSITION_INFORMATION: { From de603a52af84194a534e20ee8518eea31919f3cc Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 23 Jan 2024 20:38:52 +0900 Subject: [PATCH 071/187] ksmbd: don't increment epoch if current state and request state are same [ Upstream commit b6e9a44e99603fe10e1d78901fdd97681a539612 ] If existing lease state and request state are same, don't increment epoch in create context. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/oplock.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 8adae5871e44..e577d4f97f10 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -105,7 +105,7 @@ static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) lease->is_dir = lctx->is_dir; memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE); lease->version = lctx->version; - lease->epoch = le16_to_cpu(lctx->epoch); + lease->epoch = le16_to_cpu(lctx->epoch) + 1; INIT_LIST_HEAD(&opinfo->lease_entry); opinfo->o_lease = lease; @@ -541,6 +541,9 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, continue; } + if (lctx->req_state != lease->state) + lease->epoch++; + /* upgrading lease */ if ((atomic_read(&ci->op_count) + atomic_read(&ci->sop_count)) == 1) { @@ -1035,7 +1038,7 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2) SMB2_LEASE_KEY_SIZE); lease2->duration = lease1->duration; lease2->flags = lease1->flags; - lease2->epoch = lease1->epoch++; + lease2->epoch = lease1->epoch; lease2->version = lease1->version; } @@ -1454,7 +1457,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) memcpy(buf->lcontext.LeaseKey, lease->lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseFlags = lease->flags; - buf->lcontext.Epoch = cpu_to_le16(++lease->epoch); + buf->lcontext.Epoch = cpu_to_le16(lease->epoch); buf->lcontext.LeaseState = lease->state; memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, SMB2_LEASE_KEY_SIZE); From 844dfef31659f8f50db36e6a038a550220e34954 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 23 Jan 2024 20:38:53 +0900 Subject: [PATCH 072/187] ksmbd: send lease break notification on FILE_RENAME_INFORMATION [ Upstream commit 3fc74c65b367476874da5fe6f633398674b78e5a ] Send lease break notification on FILE_RENAME_INFORMATION request. This patch fix smb2.lease.v2_epoch2 test failure. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/oplock.c | 12 +++++++----- fs/smb/server/smb2pdu.c | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index e577d4f97f10..1253e9bde34c 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -541,14 +541,12 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, continue; } - if (lctx->req_state != lease->state) - lease->epoch++; - /* upgrading lease */ if ((atomic_read(&ci->op_count) + atomic_read(&ci->sop_count)) == 1) { if (lease->state != SMB2_LEASE_NONE_LE && lease->state == (lctx->req_state & lease->state)) { + lease->epoch++; lease->state |= lctx->req_state; if (lctx->req_state & SMB2_LEASE_WRITE_CACHING_LE) @@ -559,13 +557,17 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, atomic_read(&ci->sop_count)) > 1) { if (lctx->req_state == (SMB2_LEASE_READ_CACHING_LE | - SMB2_LEASE_HANDLE_CACHING_LE)) + SMB2_LEASE_HANDLE_CACHING_LE)) { + lease->epoch++; lease->state = lctx->req_state; + } } if (lctx->req_state && lease->state == - SMB2_LEASE_NONE_LE) + SMB2_LEASE_NONE_LE) { + lease->epoch++; lease_none_upgrade(opinfo, lctx->req_state); + } } read_lock(&ci->m_lock); } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 6e2f8a2fcd72..4cfa45c2727e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5579,6 +5579,7 @@ static int smb2_rename(struct ksmbd_work *work, if (!file_info->ReplaceIfExists) flags = RENAME_NOREPLACE; + smb_break_all_levII_oplock(work, fp, 0); rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags); out: kfree(new_name); From b1c06ee2d1711eda0864b8cc296f356d88c1b5e1 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 23 Jan 2024 20:38:54 +0900 Subject: [PATCH 073/187] ksmbd: Add missing set_freezable() for freezable kthread From: Kevin Hao [ Upstream commit 8fb7b723924cc9306bc161f45496497aec733904 ] The kernel thread function ksmbd_conn_handler_loop() invokes the try_to_freeze() in its loop. But all the kernel threads are non-freezable by default. So if we want to make a kernel thread to be freezable, we have to invoke set_freezable() explicitly. Signed-off-by: Kevin Hao Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/connection.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 7977827c6541..09e1e7771592 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -284,6 +284,7 @@ int ksmbd_conn_handler_loop(void *p) goto out; conn->last_active = jiffies; + set_freezable(); while (ksmbd_conn_alive(conn)) { if (try_to_freeze()) continue; From a5046e5eb855d973ed77014897f69f2f7ee331a7 Mon Sep 17 00:00:00 2001 From: Jonathan Gray Date: Sat, 27 Jan 2024 12:00:07 +1100 Subject: [PATCH 074/187] Revert "drm/amd: Enable PCIe PME from D3" This reverts commit 0c8d252d0a20a412ec30859afef6393aecfdd3cd. duplicated a change made in 6.1.66 c6088429630048661e480ed28590e69a48c102d6 Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Jonathan Gray Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index a8e1f2cfe12d..b9983ca99eb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2202,8 +2202,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_wake_from_d3(pdev, TRUE); - pci_wake_from_d3(pdev, TRUE); - /* * For runpm implemented via BACO, PMFW will handle the * timing for BACO in and out: From b59e08c872e0995328bbcd765f07f747d98b3ba4 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Mon, 4 Dec 2023 10:09:33 +0800 Subject: [PATCH 075/187] drm/amd/display: pbn_div need be updated for hotplug event commit 9cdef4f720376ef0fb0febce1ed2377c19e531f9 upstream. link_rate sometime will be changed when DP MST connector hotplug, so pbn_div also need be updated; otherwise, it will mismatch with link_rate, causes no output in external monitor. This is a backport to 6.7 and older. Cc: stable@vger.kernel.org Tested-by: Daniel Wheeler Reviewed-by: Jerry Zuo Acked-by: Rodrigo Siqueira Signed-off-by: Wade Wang Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 13e0b521e3db..f02e509d5fac 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6677,8 +6677,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - if (!mst_state->pbn_div) - mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_port->dc_link); + mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_port->dc_link); if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; From 49aaeb8c539b1633b3bd7c2df131ec578aa1eae1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jan 2024 18:17:44 +0200 Subject: [PATCH 076/187] wifi: mac80211: fix potential sta-link leak [ Upstream commit b01a74b3ca6fd51b62c67733ba7c3280fa6c5d26 ] When a station is allocated, links are added but not set to valid yet (e.g. during connection to an AP MLD), we might remove the station without ever marking links valid, and leak them. Fix that. Fixes: cb71f1d136a6 ("wifi: mac80211: add sta link addition/removal") Signed-off-by: Johannes Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://msgid.link/20240111181514.6573998beaf8.I09ac2e1d41c80f82a5a616b8bd1d9d8dd709a6a6@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/sta_info.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 49b71453dec3..f3d6c3e4c970 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -396,7 +396,10 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) int i; for (i = 0; i < ARRAY_SIZE(sta->link); i++) { - if (!(sta->sta.valid_links & BIT(i))) + struct link_sta_info *link_sta; + + link_sta = rcu_access_pointer(sta->link[i]); + if (!link_sta) continue; sta_remove_link(sta, i, false); From 6994dba06321e3c48fdad0ba796a063d9d82183a Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 18 Jan 2024 12:32:10 +0800 Subject: [PATCH 077/187] net/smc: fix illegal rmb_desc access in SMC-D connection dump [ Upstream commit dbc153fd3c142909e564bb256da087e13fbf239c ] A crash was found when dumping SMC-D connections. It can be reproduced by following steps: - run nginx/wrk test: smc_run nginx smc_run wrk -t 16 -c 1000 -d -H 'Connection: Close' - continuously dump SMC-D connections in parallel: watch -n 1 'smcss -D' BUG: kernel NULL pointer dereference, address: 0000000000000030 CPU: 2 PID: 7204 Comm: smcss Kdump: loaded Tainted: G E 6.7.0+ #55 RIP: 0010:__smc_diag_dump.constprop.0+0x5e5/0x620 [smc_diag] Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x66/0x150 ? exc_page_fault+0x69/0x140 ? asm_exc_page_fault+0x26/0x30 ? __smc_diag_dump.constprop.0+0x5e5/0x620 [smc_diag] ? __kmalloc_node_track_caller+0x35d/0x430 ? __alloc_skb+0x77/0x170 smc_diag_dump_proto+0xd0/0xf0 [smc_diag] smc_diag_dump+0x26/0x60 [smc_diag] netlink_dump+0x19f/0x320 __netlink_dump_start+0x1dc/0x300 smc_diag_handler_dump+0x6a/0x80 [smc_diag] ? __pfx_smc_diag_dump+0x10/0x10 [smc_diag] sock_diag_rcv_msg+0x121/0x140 ? __pfx_sock_diag_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 sock_diag_rcv+0x28/0x40 netlink_unicast+0x22a/0x330 netlink_sendmsg+0x1f8/0x420 __sock_sendmsg+0xb0/0xc0 ____sys_sendmsg+0x24e/0x300 ? copy_msghdr_from_user+0x62/0x80 ___sys_sendmsg+0x7c/0xd0 ? __do_fault+0x34/0x160 ? do_read_fault+0x5f/0x100 ? do_fault+0xb0/0x110 ? __handle_mm_fault+0x2b0/0x6c0 __sys_sendmsg+0x4d/0x80 do_syscall_64+0x69/0x180 entry_SYSCALL_64_after_hwframe+0x6e/0x76 It is possible that the connection is in process of being established when we dump it. Assumed that the connection has been registered in a link group by smc_conn_create() but the rmb_desc has not yet been initialized by smc_buf_create(), thus causing the illegal access to conn->rmb_desc. So fix it by checking before dump. Fixes: 4b1b7d3b30a6 ("net/smc: add SMC-D diag support") Signed-off-by: Wen Gu Reviewed-by: Dust Li Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/smc/smc_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 801044e7d194..7a907186a33a 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -163,7 +163,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, } if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && - !list_empty(&smc->conn.lgr->list)) { + !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) { struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; From b1e0a68a0cd2a83259c444f638b417a8fffc6855 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 18 Jan 2024 09:20:19 +0800 Subject: [PATCH 078/187] tcp: make sure init the accept_queue's spinlocks once [ Upstream commit 198bc90e0e734e5f98c3d2833e8390cac3df61b2 ] When I run syz's reproduction C program locally, it causes the following issue: pvqspinlock: lock 0xffff9d181cd5c660 has corrupted value 0x0! WARNING: CPU: 19 PID: 21160 at __pv_queued_spin_unlock_slowpath (kernel/locking/qspinlock_paravirt.h:508) Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__pv_queued_spin_unlock_slowpath (kernel/locking/qspinlock_paravirt.h:508) Code: 73 56 3a ff 90 c3 cc cc cc cc 8b 05 bb 1f 48 01 85 c0 74 05 c3 cc cc cc cc 8b 17 48 89 fe 48 c7 c7 30 20 ce 8f e8 ad 56 42 ff <0f> 0b c3 cc cc cc cc 0f 0b 0f 1f 40 00 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffa8d200604cb8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff9d1ef60e0908 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff9d1ef60e0900 RBP: ffff9d181cd5c280 R08: 0000000000000000 R09: 00000000ffff7fff R10: ffffa8d200604b68 R11: ffffffff907dcdc8 R12: 0000000000000000 R13: ffff9d181cd5c660 R14: ffff9d1813a3f330 R15: 0000000000001000 FS: 00007fa110184640(0000) GS:ffff9d1ef60c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000000 CR3: 000000011f65e000 CR4: 00000000000006f0 Call Trace: _raw_spin_unlock (kernel/locking/spinlock.c:186) inet_csk_reqsk_queue_add (net/ipv4/inet_connection_sock.c:1321) inet_csk_complete_hashdance (net/ipv4/inet_connection_sock.c:1358) tcp_check_req (net/ipv4/tcp_minisocks.c:868) tcp_v4_rcv (net/ipv4/tcp_ipv4.c:2260) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205) ip_local_deliver_finish (net/ipv4/ip_input.c:234) __netif_receive_skb_one_core (net/core/dev.c:5529) process_backlog (./include/linux/rcupdate.h:779) __napi_poll (net/core/dev.c:6533) net_rx_action (net/core/dev.c:6604) __do_softirq (./arch/x86/include/asm/jump_label.h:27) do_softirq (kernel/softirq.c:454 kernel/softirq.c:441) __local_bh_enable_ip (kernel/softirq.c:381) __dev_queue_xmit (net/core/dev.c:4374) ip_finish_output2 (./include/net/neighbour.h:540 net/ipv4/ip_output.c:235) __ip_queue_xmit (net/ipv4/ip_output.c:535) __tcp_transmit_skb (net/ipv4/tcp_output.c:1462) tcp_rcv_synsent_state_process (net/ipv4/tcp_input.c:6469) tcp_rcv_state_process (net/ipv4/tcp_input.c:6657) tcp_v4_do_rcv (net/ipv4/tcp_ipv4.c:1929) __release_sock (./include/net/sock.h:1121 net/core/sock.c:2968) release_sock (net/core/sock.c:3536) inet_wait_for_connect (net/ipv4/af_inet.c:609) __inet_stream_connect (net/ipv4/af_inet.c:702) inet_stream_connect (net/ipv4/af_inet.c:748) __sys_connect (./include/linux/file.h:45 net/socket.c:2064) __x64_sys_connect (net/socket.c:2073 net/socket.c:2070 net/socket.c:2070) do_syscall_64 (arch/x86/entry/common.c:51 arch/x86/entry/common.c:82) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7fa10ff05a3d Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ab a3 0e 00 f7 d8 64 89 01 48 RSP: 002b:00007fa110183de8 EFLAGS: 00000202 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000020000054 RCX: 00007fa10ff05a3d RDX: 000000000000001c RSI: 0000000020000040 RDI: 0000000000000003 RBP: 00007fa110183e20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000202 R12: 00007fa110184640 R13: 0000000000000000 R14: 00007fa10fe8b060 R15: 00007fff73e23b20 The issue triggering process is analyzed as follows: Thread A Thread B tcp_v4_rcv //receive ack TCP packet inet_shutdown tcp_check_req tcp_disconnect //disconnect sock ... tcp_set_state(sk, TCP_CLOSE) inet_csk_complete_hashdance ... inet_csk_reqsk_queue_add inet_listen //start listen spin_lock(&queue->rskq_lock) inet_csk_listen_start ... reqsk_queue_alloc ... spin_lock_init spin_unlock(&queue->rskq_lock) //warning When the socket receives the ACK packet during the three-way handshake, it will hold spinlock. And then the user actively shutdowns the socket and listens to the socket immediately, the spinlock will be initialized. When the socket is going to release the spinlock, a warning is generated. Also the same issue to fastopenq.lock. Move init spinlock to inet_create and inet_accept to make sure init the accept_queue's spinlocks once. Fixes: fff1f3001cc5 ("tcp: add a spinlock to protect struct request_sock_queue") Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Reported-by: Ming Shu Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240118012019.1751966-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/inet_connection_sock.h | 8 ++++++++ net/core/request_sock.c | 3 --- net/ipv4/af_inet.c | 3 +++ net/ipv4/inet_connection_sock.c | 4 ++++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c2b15f7e5516..080968d6e6c5 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -346,4 +346,12 @@ static inline bool inet_csk_has_ulp(struct sock *sk) return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; } +static inline void inet_init_csk_locks(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + spin_lock_init(&icsk->icsk_accept_queue.rskq_lock); + spin_lock_init(&icsk->icsk_accept_queue.fastopenq.lock); +} + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/net/core/request_sock.c b/net/core/request_sock.c index f35c2e998406..63de5c635842 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -33,9 +33,6 @@ void reqsk_queue_alloc(struct request_sock_queue *queue) { - spin_lock_init(&queue->rskq_lock); - - spin_lock_init(&queue->fastopenq.lock); queue->fastopenq.rskq_rst_head = NULL; queue->fastopenq.rskq_rst_tail = NULL; queue->fastopenq.qlen = 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c13b8ed63f87..2f646335d218 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -324,6 +324,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; + if (INET_PROTOSW_ICSK & answer_flags) + inet_init_csk_locks(sk); + inet = inet_sk(sk); inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 80ce0112e24b..79fa19a36bbd 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -727,6 +727,10 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } if (req) reqsk_put(req); + + if (newsk) + inet_init_csk_locks(newsk); + return newsk; out_err: newsk = NULL; From 4ee0613868d45d40ca517221dc3171bbeb2f980c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 17 Jan 2024 15:45:11 -0800 Subject: [PATCH 079/187] bnxt_en: Wait for FLR to complete during probe [ Upstream commit 3c1069fa42872f95cf3c6fedf80723d391e12d57 ] The first message to firmware may fail if the device is undergoing FLR. The driver has some recovery logic for this failure scenario but we must wait 100 msec for FLR to complete before proceeding. Otherwise the recovery will always fail. Fixes: ba02629ff6cb ("bnxt_en: log firmware status on firmware init failure") Reviewed-by: Damodharam Ammepalli Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240117234515.226944-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index df4d88d35701..f810b5dc25f0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12269,6 +12269,11 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp) bp->fw_cap = 0; rc = bnxt_hwrm_ver_get(bp); + /* FW may be unresponsive after FLR. FLR must complete within 100 msec + * so wait before continuing with recovery. + */ + if (rc) + msleep(100); bnxt_try_map_fw_health_reg(bp); if (rc) { rc = bnxt_try_recover_fw(bp); From c5e7fa4f9da43975a2182c4f271bae27c1b3dcbe Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 18 Jan 2024 21:03:06 +0800 Subject: [PATCH 080/187] vlan: skip nested type that is not IFLA_VLAN_QOS_MAPPING [ Upstream commit 6c21660fe221a15c789dee2bc2fd95516bc5aeaf ] In the vlan_changelink function, a loop is used to parse the nested attributes IFLA_VLAN_EGRESS_QOS and IFLA_VLAN_INGRESS_QOS in order to obtain the struct ifla_vlan_qos_mapping. These two nested attributes are checked in the vlan_validate_qos_map function, which calls nla_validate_nested_deprecated with the vlan_map_policy. However, this deprecated validator applies a LIBERAL strictness, allowing the presence of an attribute with the type IFLA_VLAN_QOS_UNSPEC. Consequently, the loop in vlan_changelink may parse an attribute of type IFLA_VLAN_QOS_UNSPEC and believe it carries a payload of struct ifla_vlan_qos_mapping, which is not necessarily true. To address this issue and ensure compatibility, this patch introduces two type checks that skip attributes whose type is not IFLA_VLAN_QOS_MAPPING. Fixes: 07b5b17e157b ("[VLAN]: Use rtnl_link API") Signed-off-by: Lin Ma Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240118130306.1644001-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/8021q/vlan_netlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 214532173536..a3b68243fd4b 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -118,12 +118,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); vlan_dev_set_ingress_priority(dev, m->to, m->from); } } if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); err = vlan_dev_set_egress_priority(dev, m->from, m->to); if (err) From 6d53b813ff8b177f86f149c2f744442681f720e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jan 2024 18:36:25 +0000 Subject: [PATCH 081/187] llc: make llc_ui_sendmsg() more robust against bonding changes [ Upstream commit dad555c816a50c6a6a8a86be1f9177673918c647 ] syzbot was able to trick llc_ui_sendmsg(), allocating an skb with no headroom, but subsequently trying to push 14 bytes of Ethernet header [1] Like some others, llc_ui_sendmsg() releases the socket lock before calling sock_alloc_send_skb(). Then it acquires it again, but does not redo all the sanity checks that were performed. This fix: - Uses LL_RESERVED_SPACE() to reserve space. - Check all conditions again after socket lock is held again. - Do not account Ethernet header for mtu limitation. [1] skbuff: skb_under_panic: text:ffff800088baa334 len:1514 put:14 head:ffff0000c9c37000 data:ffff0000c9c36ff2 tail:0x5dc end:0x6c0 dev:bond0 kernel BUG at net/core/skbuff.c:193 ! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 6875 Comm: syz-executor.0 Not tainted 6.7.0-rc8-syzkaller-00101-g0802e17d9aca-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_panic net/core/skbuff.c:189 [inline] pc : skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 lr : skb_panic net/core/skbuff.c:189 [inline] lr : skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 sp : ffff800096f97000 x29: ffff800096f97010 x28: ffff80008cc8d668 x27: dfff800000000000 x26: ffff0000cb970c90 x25: 00000000000005dc x24: ffff0000c9c36ff2 x23: ffff0000c9c37000 x22: 00000000000005ea x21: 00000000000006c0 x20: 000000000000000e x19: ffff800088baa334 x18: 1fffe000368261ce x17: ffff80008e4ed000 x16: ffff80008a8310f8 x15: 0000000000000001 x14: 1ffff00012df2d58 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000001 x10: 0000000000ff0100 x9 : e28a51f1087e8400 x8 : e28a51f1087e8400 x7 : ffff80008028f8d0 x6 : 0000000000000000 x5 : 0000000000000001 x4 : 0000000000000001 x3 : ffff800082b78714 x2 : 0000000000000001 x1 : 0000000100000000 x0 : 0000000000000089 Call trace: skb_panic net/core/skbuff.c:189 [inline] skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 skb_push+0xf0/0x108 net/core/skbuff.c:2451 eth_header+0x44/0x1f8 net/ethernet/eth.c:83 dev_hard_header include/linux/netdevice.h:3188 [inline] llc_mac_hdr_init+0x110/0x17c net/llc/llc_output.c:33 llc_sap_action_send_xid_c+0x170/0x344 net/llc/llc_s_ac.c:85 llc_exec_sap_trans_actions net/llc/llc_sap.c:153 [inline] llc_sap_next_state net/llc/llc_sap.c:182 [inline] llc_sap_state_process+0x1ec/0x774 net/llc/llc_sap.c:209 llc_build_and_send_xid_pkt+0x12c/0x1c0 net/llc/llc_sap.c:270 llc_ui_sendmsg+0x7bc/0xb1c net/llc/af_llc.c:997 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_sendmsg+0x194/0x274 net/socket.c:767 splice_to_socket+0x7cc/0xd58 fs/splice.c:881 do_splice_from fs/splice.c:933 [inline] direct_splice_actor+0xe4/0x1c0 fs/splice.c:1142 splice_direct_to_actor+0x2a0/0x7e4 fs/splice.c:1088 do_splice_direct+0x20c/0x348 fs/splice.c:1194 do_sendfile+0x4bc/0xc70 fs/read_write.c:1254 __do_sys_sendfile64 fs/read_write.c:1322 [inline] __se_sys_sendfile64 fs/read_write.c:1308 [inline] __arm64_sys_sendfile64+0x160/0x3b4 fs/read_write.c:1308 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 el0_svc+0x54/0x158 arch/arm64/kernel/entry-common.c:678 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:595 Code: aa1803e6 aa1903e7 a90023f5 94792f6a (d4210000) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+2a7024e9502df538e8ef@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240118183625.4007013-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/llc/af_llc.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 9ffbc667be6c..19c478bd85bd 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -928,14 +928,15 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, */ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { + DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; + int rc = -EINVAL, copied = 0, hdrlen, hh_len; struct sk_buff *skb = NULL; + struct net_device *dev; size_t size = 0; - int rc = -EINVAL, copied = 0, hdrlen; dprintk("%s: sending from %02X to %02X\n", __func__, llc->laddr.lsap, llc->daddr.lsap); @@ -955,22 +956,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (rc) goto out; } - hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); + dev = llc->dev; + hh_len = LL_RESERVED_SPACE(dev); + hdrlen = llc_ui_header_len(sk, addr); size = hdrlen + len; - if (size > llc->dev->mtu) - size = llc->dev->mtu; + size = min_t(size_t, size, READ_ONCE(dev->mtu)); copied = size - hdrlen; rc = -EINVAL; if (copied < 0) goto out; release_sock(sk); - skb = sock_alloc_send_skb(sk, size, noblock, &rc); + skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc); lock_sock(sk); if (!skb) goto out; - skb->dev = llc->dev; + if (sock_flag(sk, SOCK_ZAPPED) || + llc->dev != dev || + hdrlen != llc_ui_header_len(sk, addr) || + hh_len != LL_RESERVED_SPACE(dev) || + size > READ_ONCE(dev->mtu)) + goto out; + skb->dev = dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); - skb_reserve(skb, hdrlen); + skb_reserve(skb, hh_len + hdrlen); rc = memcpy_from_msg(skb_put(skb, copied), msg, copied); if (rc) goto out; From 660c3053d992b68fee893a0e9ec9159228cffdc6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 18 Jan 2024 17:55:15 -0800 Subject: [PATCH 082/187] llc: Drop support for ETH_P_TR_802_2. [ Upstream commit e3f9bed9bee261e3347131764e42aeedf1ffea61 ] syzbot reported an uninit-value bug below. [0] llc supports ETH_P_802_2 (0x0004) and used to support ETH_P_TR_802_2 (0x0011), and syzbot abused the latter to trigger the bug. write$tun(r0, &(0x7f0000000040)={@val={0x0, 0x11}, @val, @mpls={[], @llc={@snap={0xaa, 0x1, ')', "90e5dd"}}}}, 0x16) llc_conn_handler() initialises local variables {saddr,daddr}.mac based on skb in llc_pdu_decode_sa()/llc_pdu_decode_da() and passes them to __llc_lookup(). However, the initialisation is done only when skb->protocol is htons(ETH_P_802_2), otherwise, __llc_lookup_established() and __llc_lookup_listener() will read garbage. The missing initialisation existed prior to commit 211ed865108e ("net: delete all instances of special processing for token ring"). It removed the part to kick out the token ring stuff but forgot to close the door allowing ETH_P_TR_802_2 packets to sneak into llc_rcv(). Let's remove llc_tr_packet_type and complete the deprecation. [0]: BUG: KMSAN: uninit-value in __llc_lookup_established+0xe9d/0xf90 __llc_lookup_established+0xe9d/0xf90 __llc_lookup net/llc/llc_conn.c:611 [inline] llc_conn_handler+0x4bd/0x1360 net/llc/llc_conn.c:791 llc_rcv+0xfbb/0x14a0 net/llc/llc_input.c:206 __netif_receive_skb_one_core net/core/dev.c:5527 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5641 netif_receive_skb_internal net/core/dev.c:5727 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5786 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1555 tun_get_user+0x53af/0x66d0 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2020 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x8ef/0x1490 fs/read_write.c:584 ksys_write+0x20f/0x4c0 fs/read_write.c:637 __do_sys_write fs/read_write.c:649 [inline] __se_sys_write fs/read_write.c:646 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:646 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Local variable daddr created at: llc_conn_handler+0x53/0x1360 net/llc/llc_conn.c:783 llc_rcv+0xfbb/0x14a0 net/llc/llc_input.c:206 CPU: 1 PID: 5004 Comm: syz-executor994 Not tainted 6.6.0-syzkaller-14500-g1c41041124bd #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Fixes: 211ed865108e ("net: delete all instances of special processing for token ring") Reported-by: syzbot+b5ad66046b913bc04c6f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b5ad66046b913bc04c6f Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240119015515.61898-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/llc_pdu.h | 6 ++---- net/llc/llc_core.c | 7 ------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 49aa79c7b278..581cd37aa98b 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -262,8 +262,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, */ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); + memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); } /** @@ -275,8 +274,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) */ static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); + memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); } /** diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 6e387aadffce..4f16d9c88350 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = { .func = llc_rcv, }; -static struct packet_type llc_tr_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_TR_802_2), - .func = llc_rcv, -}; - static int __init llc_init(void) { dev_add_pack(&llc_packet_type); - dev_add_pack(&llc_tr_packet_type); return 0; } static void __exit llc_exit(void) { dev_remove_pack(&llc_packet_type); - dev_remove_pack(&llc_tr_packet_type); } module_init(llc_init); From 6646145be9085fc85310323d41196fa7fad0b189 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jan 2024 20:17:49 +0000 Subject: [PATCH 083/187] udp: fix busy polling [ Upstream commit a54d51fb2dfb846aedf3751af501e9688db447f5 ] Generic sk_busy_loop_end() only looks at sk->sk_receive_queue for presence of packets. Problem is that for UDP sockets after blamed commit, some packets could be present in another queue: udp_sk(sk)->reader_queue In some cases, a busy poller could spin until timeout expiration, even if some packets are available in udp_sk(sk)->reader_queue. v3: - make sk_busy_loop_end() nicer (Willem) v2: - add a READ_ONCE(sk->sk_family) in sk_is_inet() to avoid KCSAN splats. - add a sk_is_inet() check in sk_is_udp() (Willem feedback) - add a sk_is_inet() check in sk_is_tcp(). Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception") Signed-off-by: Eric Dumazet Reviewed-by: Paolo Abeni Reviewed-by: Willem de Bruijn Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/skmsg.h | 6 ------ include/net/inet_sock.h | 5 ----- include/net/sock.h | 18 +++++++++++++++++- net/core/sock.c | 11 +++++++++-- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c953b8c0d2f4..bd4418377bac 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -500,12 +500,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } -static inline bool sk_is_udp(const struct sock *sk) -{ - return sk->sk_type == SOCK_DGRAM && - sk->sk_protocol == IPPROTO_UDP; -} - #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c2432c2addc8..c98890e21401 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -270,11 +270,6 @@ struct inet_sock { #define IP_CMSG_CHECKSUM BIT(7) #define IP_CMSG_RECVFRAGSIZE BIT(8) -static inline bool sk_is_inet(struct sock *sk) -{ - return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; -} - /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket diff --git a/include/net/sock.h b/include/net/sock.h index 6b51e85ae69e..579732d47dfc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2824,9 +2824,25 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +static inline bool sk_is_inet(const struct sock *sk) +{ + int family = READ_ONCE(sk->sk_family); + + return family == AF_INET || family == AF_INET6; +} + static inline bool sk_is_tcp(const struct sock *sk) { - return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; + return sk_is_inet(sk) && + sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP; +} + +static inline bool sk_is_udp(const struct sock *sk) +{ + return sk_is_inet(sk) && + sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; } static inline bool sk_is_stream_unix(const struct sock *sk) diff --git a/net/core/sock.c b/net/core/sock.c index c50a14a02edd..c8803b95ea0d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -4109,8 +4110,14 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) { struct sock *sk = p; - return !skb_queue_empty_lockless(&sk->sk_receive_queue) || - sk_busy_loop_timeout(sk, start_time); + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + return true; + + if (sk_is_udp(sk) && + !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) + return true; + + return sk_busy_loop_timeout(sk, start_time); } EXPORT_SYMBOL(sk_busy_loop_end); #endif /* CONFIG_NET_RX_BUSY_POLL */ From a2232f29bf52c24f827865b3c90829c44b6c695b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 18 Jan 2024 16:58:59 -0800 Subject: [PATCH 084/187] net: fix removing a namespace with conflicting altnames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d09486a04f5da0a812c26217213b89a3b1acf836 ] Mark reports a BUG() when a net namespace is removed. kernel BUG at net/core/dev.c:11520! Physical interfaces moved outside of init_net get "refunded" to init_net when that namespace disappears. The main interface name may get overwritten in the process if it would have conflicted. We need to also discard all conflicting altnames. Recent fixes addressed ensuring that altnames get moved with the main interface, which surfaced this problem. Reported-by: Марк Коренберг Link: https://lore.kernel.org/all/CAEmTpZFZ4Sv3KwqFOY2WKDHeZYdi0O7N5H1nTvcGp=SAEavtDg@mail.gmail.com/ Fixes: 7663d522099e ("net: check for altname conflicts when changing netdev's netns") Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Jiri Pirko Reviewed-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/dev.c | 9 +++++++++ net/core/dev.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 0a5566b6f8a2..1ba3662faf0a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11323,6 +11323,7 @@ static struct pernet_operations __net_initdata netdev_net_ops = { static void __net_exit default_device_exit_net(struct net *net) { + struct netdev_name_node *name_node, *tmp; struct net_device *dev, *aux; /* * Push all migratable network devices back to the @@ -11345,6 +11346,14 @@ static void __net_exit default_device_exit_net(struct net *net) snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); if (netdev_name_in_use(&init_net, fb_name)) snprintf(fb_name, IFNAMSIZ, "dev%%d"); + + netdev_for_each_altname_safe(dev, name_node, tmp) + if (netdev_name_in_use(&init_net, name_node->name)) { + netdev_name_node_del(name_node); + synchronize_rcu(); + __netdev_name_node_alt_destroy(name_node); + } + err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { pr_emerg("%s: failed to move %s to init_net: %d\n", diff --git a/net/core/dev.h b/net/core/dev.h index 9ca91457c197..db9ff8cd8d46 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -63,6 +63,9 @@ int dev_change_name(struct net_device *dev, const char *newname); #define netdev_for_each_altname(dev, namenode) \ list_for_each_entry((namenode), &(dev)->name_node->list, list) +#define netdev_for_each_altname_safe(dev, namenode, next) \ + list_for_each_entry_safe((namenode), (next), &(dev)->name_node->list, \ + list) int netdev_name_node_alt_create(struct net_device *dev, const char *name); int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); From 41e7decdad0427016fe7179e2e5db484ffa9cd3c Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 19 Jan 2024 18:22:35 +0800 Subject: [PATCH 085/187] tun: fix missing dropped counter in tun_xdp_act [ Upstream commit 5744ba05e7c4bff8fec133dd0f9e51ddffba92f5 ] The commit 8ae1aff0b331 ("tuntap: split out XDP logic") includes dropped counter for XDP_DROP, XDP_ABORTED, and invalid XDP actions. Unfortunately, that commit missed the dropped counter when error occurs during XDP_TX and XDP_REDIRECT actions. This patch fixes this issue. Fixes: 8ae1aff0b331 ("tuntap: split out XDP logic") Signed-off-by: Yunjian Wang Reviewed-by: Willem de Bruijn Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/tun.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d373953ddc30..af32aa599278 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1622,13 +1622,17 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, switch (act) { case XDP_REDIRECT: err = xdp_do_redirect(tun->dev, xdp, xdp_prog); - if (err) + if (err) { + dev_core_stats_rx_dropped_inc(tun->dev); return err; + } break; case XDP_TX: err = tun_xdp_tx(tun->dev, xdp); - if (err < 0) + if (err < 0) { + dev_core_stats_rx_dropped_inc(tun->dev); return err; + } break; case XDP_PASS: break; From 7a581f597a7887da70558a66eaca0373c77d0e68 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 19 Jan 2024 18:22:56 +0800 Subject: [PATCH 086/187] tun: add missing rx stats accounting in tun_xdp_act [ Upstream commit f1084c427f55d573fcd5688d9ba7b31b78019716 ] The TUN can be used as vhost-net backend, and it is necessary to count the packets transmitted from TUN to vhost-net/virtio-net. However, there are some places in the receive path that were not taken into account when using XDP. It would be beneficial to also include new accounting for successfully received bytes using dev_sw_netstats_rx_add. Fixes: 761876c857cb ("tap: XDP support") Signed-off-by: Yunjian Wang Reviewed-by: Willem de Bruijn Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/tun.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index af32aa599278..367255bb44cd 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1626,6 +1626,7 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, dev_core_stats_rx_dropped_inc(tun->dev); return err; } + dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data); break; case XDP_TX: err = tun_xdp_tx(tun->dev, xdp); @@ -1633,6 +1634,7 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, dev_core_stats_rx_dropped_inc(tun->dev); return err; } + dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data); break; case XDP_PASS: break; From fcb0b4b6bc72159b0316431a8b2287faa3d3f423 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 19 Jan 2024 11:47:50 +0100 Subject: [PATCH 087/187] net: micrel: Fix PTP frame parsing for lan8814 [ Upstream commit aaf632f7ab6dec57bc9329a438f94504fe8034b9 ] The HW has the capability to check each frame if it is a PTP frame, which domain it is, which ptp frame type it is, different ip address in the frame. And if one of these checks fail then the frame is not timestamp. Most of these checks were disabled except checking the field minorVersionPTP inside the PTP header. Meaning that once a partner sends a frame compliant to 8021AS which has minorVersionPTP set to 1, then the frame was not timestamp because the HW expected by default a value of 0 in minorVersionPTP. This is exactly the same issue as on lan8841. Fix this issue by removing this check so the userspace can decide on this. Fixes: ece19502834d ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Horatiu Vultur Reviewed-by: Maxime Chevallier Reviewed-by: Divya Koppera Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/micrel.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 7cbcf51bae92..9481f172830f 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -120,6 +120,11 @@ */ #define LAN8814_1PPM_FORMAT 17179 +#define PTP_RX_VERSION 0x0248 +#define PTP_TX_VERSION 0x0288 +#define PTP_MAX_VERSION(x) (((x) & GENMASK(7, 0)) << 8) +#define PTP_MIN_VERSION(x) ((x) & GENMASK(7, 0)) + #define PTP_RX_MOD 0x024F #define PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_ BIT(3) #define PTP_RX_TIMESTAMP_EN 0x024D @@ -2922,6 +2927,12 @@ static void lan8814_ptp_init(struct phy_device *phydev) lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_IP_ADDR_EN, 0); lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_IP_ADDR_EN, 0); + /* Disable checking for minorVersionPTP field */ + lanphy_write_page_reg(phydev, 5, PTP_RX_VERSION, + PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0)); + lanphy_write_page_reg(phydev, 5, PTP_TX_VERSION, + PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0)); + skb_queue_head_init(&ptp_priv->tx_queue); skb_queue_head_init(&ptp_priv->rx_queue); INIT_LIST_HEAD(&ptp_priv->rx_ts_list); From 71024928b3f71ce4529426f8692943205c58d30b Mon Sep 17 00:00:00 2001 From: Sharath Srinivasan Date: Fri, 19 Jan 2024 17:48:39 -0800 Subject: [PATCH 088/187] net/rds: Fix UBSAN: array-index-out-of-bounds in rds_cmsg_recv [ Upstream commit 13e788deb7348cc88df34bed736c3b3b9927ea52 ] Syzcaller UBSAN crash occurs in rds_cmsg_recv(), which reads inc->i_rx_lat_trace[j + 1] with index 4 (3 + 1), but with array size of 4 (RDS_RX_MAX_TRACES). Here 'j' is assigned from rs->rs_rx_trace[i] and in-turn from trace.rx_trace_pos[i] in rds_recv_track_latency(), with both arrays sized 3 (RDS_MSG_RX_DGRAM_TRACE_MAX). So fix the off-by-one bounds check in rds_recv_track_latency() to prevent a potential crash in rds_cmsg_recv(). Found by syzcaller: ================================================================= UBSAN: array-index-out-of-bounds in net/rds/recv.c:585:39 index 4 is out of range for type 'u64 [4]' CPU: 1 PID: 8058 Comm: syz-executor228 Not tainted 6.6.0-gd2f51b3516da #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x136/0x150 lib/dump_stack.c:106 ubsan_epilogue lib/ubsan.c:217 [inline] __ubsan_handle_out_of_bounds+0xd5/0x130 lib/ubsan.c:348 rds_cmsg_recv+0x60d/0x700 net/rds/recv.c:585 rds_recvmsg+0x3fb/0x1610 net/rds/recv.c:716 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg+0xe2/0x160 net/socket.c:1066 __sys_recvfrom+0x1b6/0x2f0 net/socket.c:2246 __do_sys_recvfrom net/socket.c:2264 [inline] __se_sys_recvfrom net/socket.c:2260 [inline] __x64_sys_recvfrom+0xe0/0x1b0 net/socket.c:2260 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b ================================================================== Fixes: 3289025aedc0 ("RDS: add receive message trace used by application") Reported-by: Chenyuan Yang Closes: https://lore.kernel.org/linux-rdma/CALGdzuoVdq-wtQ4Az9iottBqC5cv9ZhcE5q8N7LfYFvkRsOVcw@mail.gmail.com/ Signed-off-by: Sharath Srinivasan Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rds/af_rds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 3ff6995244e5..d107f7605db4 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -419,7 +419,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval, rs->rs_rx_traces = trace.rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { - if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { + if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) { rs->rs_rx_traces = 0; return -EFAULT; } From 82a9bc343ba019665d3ddc1d9a180bf0e0390cf3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jan 2024 09:59:41 +0300 Subject: [PATCH 089/187] netfs, fscache: Prevent Oops in fscache_put_cache() [ Upstream commit 3be0b3ed1d76c6703b9ee482b55f7e01c369cc68 ] This function dereferences "cache" and then checks if it's IS_ERR_OR_NULL(). Check first, then dereference. Fixes: 9549332df4ed ("fscache: Implement cache registration") Signed-off-by: Dan Carpenter Signed-off-by: David Howells Link: https://lore.kernel.org/r/e84bc740-3502-4f16-982a-a40d5676615c@moroto.mountain/ # v2 Signed-off-by: Sasha Levin --- fs/fscache/cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index d645f8b302a2..9397ed39b0b4 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -179,13 +179,14 @@ EXPORT_SYMBOL(fscache_acquire_cache); void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where) { - unsigned int debug_id = cache->debug_id; + unsigned int debug_id; bool zero; int ref; if (IS_ERR_OR_NULL(cache)) return; + debug_id = cache->debug_id; zero = __refcount_dec_and_test(&cache->ref, &ref); trace_fscache_cache(debug_id, ref - 1, where); From f4f7e696db0274ff560482cc52eddbf0551d4b7a Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 22 Jan 2024 16:09:28 +0100 Subject: [PATCH 090/187] tracing: Ensure visibility when inserting an element into tracing_map [ Upstream commit 2b44760609e9eaafc9d234a6883d042fc21132a7 ] Running the following two commands in parallel on a multi-processor AArch64 machine can sporadically produce an unexpected warning about duplicate histogram entries: $ while true; do echo hist:key=id.syscall:val=hitcount > \ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist sleep 0.001 done $ stress-ng --sysbadaddr $(nproc) The warning looks as follows: [ 2911.172474] ------------[ cut here ]------------ [ 2911.173111] Duplicates detected: 1 [ 2911.173574] WARNING: CPU: 2 PID: 12247 at kernel/trace/tracing_map.c:983 tracing_map_sort_entries+0x3e0/0x408 [ 2911.174702] Modules linked in: iscsi_ibft(E) iscsi_boot_sysfs(E) rfkill(E) af_packet(E) nls_iso8859_1(E) nls_cp437(E) vfat(E) fat(E) ena(E) tiny_power_button(E) qemu_fw_cfg(E) button(E) fuse(E) efi_pstore(E) ip_tables(E) x_tables(E) xfs(E) libcrc32c(E) aes_ce_blk(E) aes_ce_cipher(E) crct10dif_ce(E) polyval_ce(E) polyval_generic(E) ghash_ce(E) gf128mul(E) sm4_ce_gcm(E) sm4_ce_ccm(E) sm4_ce(E) sm4_ce_cipher(E) sm4(E) sm3_ce(E) sm3(E) sha3_ce(E) sha512_ce(E) sha512_arm64(E) sha2_ce(E) sha256_arm64(E) nvme(E) sha1_ce(E) nvme_core(E) nvme_auth(E) t10_pi(E) sg(E) scsi_mod(E) scsi_common(E) efivarfs(E) [ 2911.174738] Unloaded tainted modules: cppc_cpufreq(E):1 [ 2911.180985] CPU: 2 PID: 12247 Comm: cat Kdump: loaded Tainted: G E 6.7.0-default #2 1b58bbb22c97e4399dc09f92d309344f69c44a01 [ 2911.182398] Hardware name: Amazon EC2 c7g.8xlarge/, BIOS 1.0 11/1/2018 [ 2911.183208] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 2911.184038] pc : tracing_map_sort_entries+0x3e0/0x408 [ 2911.184667] lr : tracing_map_sort_entries+0x3e0/0x408 [ 2911.185310] sp : ffff8000a1513900 [ 2911.185750] x29: ffff8000a1513900 x28: ffff0003f272fe80 x27: 0000000000000001 [ 2911.186600] x26: ffff0003f272fe80 x25: 0000000000000030 x24: 0000000000000008 [ 2911.187458] x23: ffff0003c5788000 x22: ffff0003c16710c8 x21: ffff80008017f180 [ 2911.188310] x20: ffff80008017f000 x19: ffff80008017f180 x18: ffffffffffffffff [ 2911.189160] x17: 0000000000000000 x16: 0000000000000000 x15: ffff8000a15134b8 [ 2911.190015] x14: 0000000000000000 x13: 205d373432323154 x12: 5b5d313131333731 [ 2911.190844] x11: 00000000fffeffff x10: 00000000fffeffff x9 : ffffd1b78274a13c [ 2911.191716] x8 : 000000000017ffe8 x7 : c0000000fffeffff x6 : 000000000057ffa8 [ 2911.192554] x5 : ffff0012f6c24ec0 x4 : 0000000000000000 x3 : ffff2e5b72b5d000 [ 2911.193404] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0003ff254480 [ 2911.194259] Call trace: [ 2911.194626] tracing_map_sort_entries+0x3e0/0x408 [ 2911.195220] hist_show+0x124/0x800 [ 2911.195692] seq_read_iter+0x1d4/0x4e8 [ 2911.196193] seq_read+0xe8/0x138 [ 2911.196638] vfs_read+0xc8/0x300 [ 2911.197078] ksys_read+0x70/0x108 [ 2911.197534] __arm64_sys_read+0x24/0x38 [ 2911.198046] invoke_syscall+0x78/0x108 [ 2911.198553] el0_svc_common.constprop.0+0xd0/0xf8 [ 2911.199157] do_el0_svc+0x28/0x40 [ 2911.199613] el0_svc+0x40/0x178 [ 2911.200048] el0t_64_sync_handler+0x13c/0x158 [ 2911.200621] el0t_64_sync+0x1a8/0x1b0 [ 2911.201115] ---[ end trace 0000000000000000 ]--- The problem appears to be caused by CPU reordering of writes issued from __tracing_map_insert(). The check for the presence of an element with a given key in this function is: val = READ_ONCE(entry->val); if (val && keys_match(key, val->key, map->key_size)) ... The write of a new entry is: elt = get_free_elt(map); memcpy(elt->key, key, map->key_size); entry->val = elt; The "memcpy(elt->key, key, map->key_size);" and "entry->val = elt;" stores may become visible in the reversed order on another CPU. This second CPU might then incorrectly determine that a new key doesn't match an already present val->key and subsequently insert a new element, resulting in a duplicate. Fix the problem by adding a write barrier between "memcpy(elt->key, key, map->key_size);" and "entry->val = elt;", and for good measure, also use WRITE_ONCE(entry->val, elt) for publishing the element. The sequence pairs with the mentioned "READ_ONCE(entry->val);" and the "val->key" check which has an address dependency. The barrier is placed on a path executed when adding an element for a new key. Subsequent updates targeting the same key remain unaffected. From the user's perspective, the issue was introduced by commit c193707dde77 ("tracing: Remove code which merges duplicates"), which followed commit cbf4100efb8f ("tracing: Add support to detect and avoid duplicates"). The previous code operated differently; it inherently expected potential races which result in duplicates but merged them later when they occurred. Link: https://lore.kernel.org/linux-trace-kernel/20240122150928.27725-1-petr.pavlu@suse.com Fixes: c193707dde77 ("tracing: Remove code which merges duplicates") Signed-off-by: Petr Pavlu Acked-by: Tom Zanussi Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/tracing_map.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index c774e560f2f9..a4dcf0f24352 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) } memcpy(elt->key, key, map->key_size); - entry->val = elt; + /* + * Ensure the initialization is visible and + * publish the elt. + */ + smp_wmb(); + WRITE_ONCE(entry->val, elt); atomic64_inc(&map->hits); return entry->val; From ab49164c60803d5f637fa9643270db9f459d852c Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Jan 2024 17:22:36 +0000 Subject: [PATCH 091/187] afs: Hide silly-rename files from userspace [ Upstream commit 57e9d49c54528c49b8bffe6d99d782ea051ea534 ] There appears to be a race between silly-rename files being created/removed and various userspace tools iterating over the contents of a directory, leading to such errors as: find: './kernel/.tmp_cpio_dir/include/dt-bindings/reset/.__afs2080': No such file or directory tar: ./include/linux/greybus/.__afs3C95: File removed before we read it when building a kernel. Fix afs_readdir() so that it doesn't return .__afsXXXX silly-rename files to userspace. This doesn't stop them being looked up directly by name as we need to be able to look them up from within the kernel as part of the silly-rename algorithm. Fixes: 79ddbfa500b3 ("afs: Implement sillyrename for unlink and rename") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Sasha Levin --- fs/afs/dir.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 07dc4ec73520..cf811b77ee67 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -473,6 +473,14 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, continue; } + /* Don't expose silly rename entries to userspace. */ + if (nlen > 6 && + dire->u.name[0] == '.' && + ctx->actor != afs_lookup_filldir && + ctx->actor != afs_lookup_one_filldir && + memcmp(dire->u.name, ".__afs", 6) == 0) + continue; + /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, ntohl(dire->u.vnode), From 90fba981cacb70a421809b4055d6c38ee133ff6a Mon Sep 17 00:00:00 2001 From: Salvatore Dipietro Date: Fri, 19 Jan 2024 11:01:33 -0800 Subject: [PATCH 092/187] tcp: Add memory barrier to tcp_push() [ Upstream commit 7267e8dcad6b2f9fce05a6a06335d7040acbc2b6 ] On CPUs with weak memory models, reads and updates performed by tcp_push to the sk variables can get reordered leaving the socket throttled when it should not. The tasklet running tcp_wfree() may also not observe the memory updates in time and will skip flushing any packets throttled by tcp_push(), delaying the sending. This can pathologically cause 40ms extra latency due to bad interactions with delayed acks. Adding a memory barrier in tcp_push removes the bug, similarly to the previous commit bf06200e732d ("tcp: tsq: fix nonagle handling"). smp_mb__after_atomic() is used to not incur in unnecessary overhead on x86 since not affected. Patch has been tested using an AWS c7g.2xlarge instance with Ubuntu 22.04 and Apache Tomcat 9.0.83 running the basic servlet below: import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; public class HelloWorldServlet extends HttpServlet { @Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=utf-8"); OutputStreamWriter osw = new OutputStreamWriter(response.getOutputStream(),"UTF-8"); String s = "a".repeat(3096); osw.write(s,0,s.length()); osw.flush(); } } Load was applied using wrk2 (https://github.com/kinvolk/wrk2) from an AWS c6i.8xlarge instance. Before the patch an additional 40ms latency from P99.99+ values is observed while, with the patch, the extra latency disappears. No patch and tcp_autocorking=1 ./wrk -t32 -c128 -d40s --latency -R10000 http://172.31.60.173:8080/hello/hello ... 50.000% 0.91ms 75.000% 1.13ms 90.000% 1.46ms 99.000% 1.74ms 99.900% 1.89ms 99.990% 41.95ms <<< 40+ ms extra latency 99.999% 48.32ms 100.000% 48.96ms With patch and tcp_autocorking=1 ./wrk -t32 -c128 -d40s --latency -R10000 http://172.31.60.173:8080/hello/hello ... 50.000% 0.90ms 75.000% 1.13ms 90.000% 1.45ms 99.000% 1.72ms 99.900% 1.83ms 99.990% 2.11ms <<< no 40+ ms extra latency 99.999% 2.53ms 100.000% 2.62ms Patch has been also tested on x86 (m7i.2xlarge instance) which it is not affected by this issue and the patch doesn't introduce any additional delay. Fixes: 7aa5470c2c09 ("tcp: tsq: move tsq_flags close to sk_wmem_alloc") Signed-off-by: Salvatore Dipietro Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240119190133.43698-1-dipiets@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0b7844a8d571..90e24c3f6557 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -718,6 +718,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now, if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); + smp_mb__after_atomic(); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED. From de061604f83419f8f59f4d352efc9a772607b7ce Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 22 Jan 2024 09:18:07 +0800 Subject: [PATCH 093/187] netlink: fix potential sleeping issue in mqueue_flush_file [ Upstream commit 234ec0b6034b16869d45128b8cd2dc6ffe596f04 ] I analyze the potential sleeping issue of the following processes: Thread A Thread B ... netlink_create //ref = 1 do_mq_notify ... sock = netlink_getsockbyfilp ... //ref = 2 info->notify_sock = sock; ... ... netlink_sendmsg ... skb = netlink_alloc_large_skb //skb->head is vmalloced ... netlink_unicast ... sk = netlink_getsockbyportid //ref = 3 ... netlink_sendskb ... __netlink_sendskb ... skb_queue_tail //put skb to sk_receive_queue ... sock_put //ref = 2 ... ... ... netlink_release ... deferred_put_nlk_sk //ref = 1 mqueue_flush_file spin_lock remove_notification netlink_sendskb sock_put //ref = 0 sk_free ... __sk_destruct netlink_sock_destruct skb_queue_purge //get skb from sk_receive_queue ... __skb_queue_purge_reason kfree_skb_reason __kfree_skb ... skb_release_all skb_release_head_state netlink_skb_destructor vfree(skb->head) //sleeping while holding spinlock In netlink_sendmsg, if the memory pointed to by skb->head is allocated by vmalloc, and is put to sk_receive_queue queue, also the skb is not freed. When the mqueue executes flush, the sleeping bug will occur. Use vfree_atomic instead of vfree in netlink_skb_destructor to solve the issue. Fixes: c05cdb1b864f ("netlink: allow large data transfers from user-space") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20240122011807.2110357-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index cb833302270a..6857a4965fe8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb) if (is_vmalloc_addr(skb->head)) { if (!skb->cloned || !atomic_dec_return(&(skb_shinfo(skb)->dataref))) - vfree(skb->head); + vfree_atomic(skb->head); skb->head = NULL; } From f11792c3899c8d4a287ca02ad9c4745ac9d0fd6f Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 22 Jan 2024 18:20:01 +0800 Subject: [PATCH 094/187] ipv6: init the accept_queue's spinlocks in inet6_create [ Upstream commit 435e202d645c197dcfd39d7372eb2a56529b6640 ] In commit 198bc90e0e73("tcp: make sure init the accept_queue's spinlocks once"), the spinlocks of accept_queue are initialized only when socket is created in the inet4 scenario. The locks are not initialized when socket is created in the inet6 scenario. The kernel reports the following error: INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Call Trace: dump_stack_lvl (lib/dump_stack.c:107) register_lock_class (kernel/locking/lockdep.c:1289) __lock_acquire (kernel/locking/lockdep.c:5015) lock_acquire.part.0 (kernel/locking/lockdep.c:5756) _raw_spin_lock_bh (kernel/locking/spinlock.c:178) inet_csk_listen_stop (net/ipv4/inet_connection_sock.c:1386) tcp_disconnect (net/ipv4/tcp.c:2981) inet_shutdown (net/ipv4/af_inet.c:935) __sys_shutdown (./include/linux/file.h:32 net/socket.c:2438) __x64_sys_shutdown (net/socket.c:2445) do_syscall_64 (arch/x86/entry/common.c:52) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f52ecd05a3d Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ab a3 0e 00 f7 d8 64 89 01 48 RSP: 002b:00007f52ecf5dde8 EFLAGS: 00000293 ORIG_RAX: 0000000000000030 RAX: ffffffffffffffda RBX: 00007f52ecf5e640 RCX: 00007f52ecd05a3d RDX: 00007f52ecc8b188 RSI: 0000000000000000 RDI: 0000000000000004 RBP: 00007f52ecf5de20 R08: 00007ffdae45c69f R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 00007f52ecf5e640 R13: 0000000000000000 R14: 00007f52ecc8b060 R15: 00007ffdae45c6e0 Fixes: 198bc90e0e73 ("tcp: make sure init the accept_queue's spinlocks once") Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240122102001.2851701-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/af_inet6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a2f29ca51600..0b42eb8c55aa 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -199,6 +199,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; + if (INET_PROTOSW_ICSK & answer_flags) + inet_init_csk_locks(sk); + inet = inet_sk(sk); inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; From e54aedd4d0e5bf6a6bf6be89169dbf2eed72170d Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Dec 2023 11:24:08 +0200 Subject: [PATCH 095/187] net/mlx5: DR, Use the right GVMI number for drop action [ Upstream commit 5665954293f13642f9c052ead83c1e9d8cff186f ] When FW provides ICM addresses for drop RX/TX, the provided capability is 64 bits that contain its GVMI as well as the ICM address itself. In case of TX DROP this GVMI is different from the GVMI that the domain is operating on. This patch fixes the action to use these GVMI IDs, as provided by FW. Fixes: 9db810ed2d37 ("net/mlx5: DR, Expose steering action functionality") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index a3e7602b044e..8c265250bbaf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -673,6 +673,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, switch (action_type) { case DR_ACTION_TYP_DROP: attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; break; case DR_ACTION_TYP_FT: dest_action = action; From 75d9ed4930b5a698cc05e528b9672f6ae6f41f43 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Dec 2023 13:20:36 +0200 Subject: [PATCH 096/187] net/mlx5: DR, Can't go to uplink vport on RX rule [ Upstream commit 5b2a2523eeea5f03d39a9d1ff1bad2e9f8eb98d2 ] Go-To-Vport action on RX is not allowed when the vport is uplink. In such case, the packet should be dropped. Fixes: 9db810ed2d37 ("net/mlx5: DR, Expose steering action functionality") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Erez Shitrit Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/steering/dr_action.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 8c265250bbaf..bf7517725d8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -762,11 +762,17 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, action->sampler->tx_icm_addr; break; case DR_ACTION_TYP_VPORT: - attr.hit_gvmi = action->vport->caps->vhca_gvmi; - dest_action = action; - attr.final_icm_addr = rx_rule ? - action->vport->caps->icm_address_rx : - action->vport->caps->icm_address_tx; + if (unlikely(rx_rule && action->vport->caps->num == MLX5_VPORT_UPLINK)) { + /* can't go to uplink on RX rule - dropping instead */ + attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; + } else { + attr.hit_gvmi = action->vport->caps->vhca_gvmi; + dest_action = action; + attr.final_icm_addr = rx_rule ? + action->vport->caps->icm_address_rx : + action->vport->caps->icm_address_tx; + } break; case DR_ACTION_TYP_POP_VLAN: if (!rx_rule && !(dmn->ste_ctx->actions_caps & From 62ce16005ee4bb1a20ed151339d387050936f1fe Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 28 Nov 2023 14:01:54 -0800 Subject: [PATCH 097/187] net/mlx5: Use mlx5 device constant for selecting CQ period mode for ASO [ Upstream commit 20cbf8cbb827094197f3b17db60d71449415db1e ] mlx5 devices have specific constants for choosing the CQ period mode. These constants do not have to match the constants used by the kernel software API for DIM period mode selection. Fixes: cdd04f4d4d71 ("net/mlx5: Add support to create SQ and CQ for ASO") Signed-off-by: Rahul Rameshbabu Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c index c971ff04dd04..c215252f2f53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -98,7 +98,7 @@ static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data) mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); - MLX5_SET(cqc, cqc, cq_period_mode, DIM_CQ_PERIOD_MODE_START_FROM_EQE); + MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - From 890881d10f351bf24bb2fd617c1d2f7b4c9a6065 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Dec 2023 13:52:55 +0200 Subject: [PATCH 098/187] net/mlx5e: Allow software parsing when IPsec crypto is enabled [ Upstream commit 20f5468a7988dedd94a57ba8acd65ebda6a59723 ] All ConnectX devices have software parsing capability enabled, but it is more correct to set allow_swp only if capability exists, which for IPsec means that crypto offload is supported. Fixes: 2451da081a34 ("net/mlx5: Unify device IPsec capabilities check") Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 29dd3a04c154..d3de1b7a80bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -990,8 +990,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); bool allow_swp; - allow_swp = - mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev); + allow_swp = mlx5_geneve_tx_allowed(mdev) || + (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO); mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); From 42876db001bbea7558e8676d1019f08f9390addb Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 17 Jan 2024 15:17:36 +0800 Subject: [PATCH 099/187] net/mlx5e: fix a double-free in arfs_create_groups [ Upstream commit 3c6d5189246f590e4e1f167991558bdb72a4738b ] When `in` allocated by kvzalloc fails, arfs_create_groups will free ft->g and return an error. However, arfs_create_table, the only caller of arfs_create_groups, will hold this error and call to mlx5e_destroy_flow_table, in which the ft->g will be freed again. Fixes: 1cabe6b0965e ("net/mlx5e: Create aRFS flow tables") Signed-off-by: Zhipeng Lu Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_arfs.c | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index dc0a0a27ac84..58eacba6de8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -255,11 +255,13 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in || !ft->g) { - kfree(ft->g); - kvfree(in); + if (!ft->g) return -ENOMEM; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free_g; } mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); @@ -279,7 +281,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, break; default: err = -EINVAL; - goto out; + goto err_free_in; } switch (type) { @@ -301,7 +303,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, break; default: err = -EINVAL; - goto out; + goto err_free_in; } MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); @@ -310,7 +312,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err; + goto err_clean_group; ft->num_groups++; memset(in, 0, inlen); @@ -319,18 +321,20 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err; + goto err_clean_group; ft->num_groups++; kvfree(in); return 0; -err: +err_clean_group: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; -out: +err_free_in: kvfree(in); - +err_free_g: + kfree(ft->g); + ft->g = NULL; return err; } From b2fa86b2aceb4bc9ada51cea90f61546d7512cbe Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Tue, 28 Nov 2023 17:29:01 +0800 Subject: [PATCH 100/187] net/mlx5e: fix a potential double-free in fs_any_create_groups [ Upstream commit aef855df7e1bbd5aa4484851561211500b22707e ] When kcalloc() for ft->g succeeds but kvzalloc() for in fails, fs_any_create_groups() will free ft->g. However, its caller fs_any_create_table() will free ft->g again through calling mlx5e_destroy_flow_table(), which will lead to a double-free. Fix this by setting ft->g to NULL in fs_any_create_groups(). Fixes: 0f575c20bf06 ("net/mlx5e: Introduce Flow Steering ANY API") Signed-off-by: Dinghao Liu Reviewed-by: Tariq Toukan Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index e1283531e0b8..671adbad0a40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -436,6 +436,7 @@ static int fs_any_create_groups(struct mlx5e_flow_table *ft) in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } From c817f5c01626b59aba271d400ac565fd20d173b0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Dec 2023 00:19:15 +0100 Subject: [PATCH 101/187] rcu: Defer RCU kthreads wakeup when CPU is dying [ Upstream commit e787644caf7628ad3269c1fbd321c3255cf51710 ] When the CPU goes idle for the last time during the CPU down hotplug process, RCU reports a final quiescent state for the current CPU. If this quiescent state propagates up to the top, some tasks may then be woken up to complete the grace period: the main grace period kthread and/or the expedited main workqueue (or kworker). If those kthreads have a SCHED_FIFO policy, the wake up can indirectly arm the RT bandwith timer to the local offline CPU. Since this happens after hrtimers have been migrated at CPUHP_AP_HRTIMERS_DYING stage, the timer gets ignored. Therefore if the RCU kthreads are waiting for RT bandwidth to be available, they may never be actually scheduled. This triggers TREE03 rcutorture hangs: rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 4-...!: (1 GPs behind) idle=9874/1/0x4000000000000000 softirq=0/0 fqs=20 rcuc=21071 jiffies(starved) rcu: (t=21035 jiffies g=938281 q=40787 ncpus=6) rcu: rcu_preempt kthread starved for 20964 jiffies! g938281 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0 rcu: Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior. rcu: RCU grace-period kthread stack dump: task:rcu_preempt state:R running task stack:14896 pid:14 tgid:14 ppid:2 flags:0x00004000 Call Trace: __schedule+0x2eb/0xa80 schedule+0x1f/0x90 schedule_timeout+0x163/0x270 ? __pfx_process_timeout+0x10/0x10 rcu_gp_fqs_loop+0x37c/0x5b0 ? __pfx_rcu_gp_kthread+0x10/0x10 rcu_gp_kthread+0x17c/0x200 kthread+0xde/0x110 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2b/0x40 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 The situation can't be solved with just unpinning the timer. The hrtimer infrastructure and the nohz heuristics involved in finding the best remote target for an unpinned timer would then also need to handle enqueues from an offline CPU in the most horrendous way. So fix this on the RCU side instead and defer the wake up to an online CPU if it's too late for the local one. Reported-by: Paul E. McKenney Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) Signed-off-by: Sasha Levin --- kernel/rcu/tree.c | 34 +++++++++++++++++++++++++++++++++- kernel/rcu/tree_exp.h | 3 +-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 15df37bc052a..9d7464a90f85 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1051,6 +1051,38 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) return needmore; } +static void swake_up_one_online_ipi(void *arg) +{ + struct swait_queue_head *wqh = arg; + + swake_up_one(wqh); +} + +static void swake_up_one_online(struct swait_queue_head *wqh) +{ + int cpu = get_cpu(); + + /* + * If called from rcutree_report_cpu_starting(), wake up + * is dangerous that late in the CPU-down hotplug process. The + * scheduler might queue an ignored hrtimer. Defer the wake up + * to an online CPU instead. + */ + if (unlikely(cpu_is_offline(cpu))) { + int target; + + target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU), + cpu_online_mask); + + smp_call_function_single(target, swake_up_one_online_ipi, + wqh, 0); + put_cpu(); + } else { + put_cpu(); + swake_up_one(wqh); + } +} + /* * Awaken the grace-period kthread. Don't do a self-awaken (unless in an * interrupt or softirq handler, in which case we just might immediately @@ -1075,7 +1107,7 @@ static void rcu_gp_kthread_wake(void) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); - swake_up_one(&rcu_state.gp_wq); + swake_up_one_online(&rcu_state.gp_wq); } /* diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index aa3ec3c3b9f7..6d2cbed96b46 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -172,7 +172,6 @@ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp) return ret; } - /* * Report the exit from RCU read-side critical section for the last task * that queued itself during or before the current expedited preemptible-RCU @@ -200,7 +199,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (wake) { smp_mb(); /* EGP done before wake_up(). */ - swake_up_one(&rcu_state.expedited_wq); + swake_up_one_online(&rcu_state.expedited_wq); } break; } From bc6e242bb74e2ae616bfd2b250682b738e781c9b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Jan 2024 13:11:32 +0100 Subject: [PATCH 102/187] netfilter: nft_limit: reject configurations that cause integer overflow [ Upstream commit c9d9eb9c53d37cdebbad56b91e40baf42d5a97aa ] Reject bogus configs where internal token counter wraps around. This only occurs with very very large requests, such as 17gbyte/s. Its better to reject this rather than having incorrect ratelimit. Fixes: d2168e849ebf ("netfilter: nft_limit: add per-byte limiting") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_limit.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 75c05ef885a9..36ded7d43262 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -58,17 +58,19 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost) static int nft_limit_init(struct nft_limit_priv *priv, const struct nlattr * const tb[], bool pkts) { + u64 unit, tokens, rate_with_burst; bool invert = false; - u64 unit, tokens; if (tb[NFTA_LIMIT_RATE] == NULL || tb[NFTA_LIMIT_UNIT] == NULL) return -EINVAL; priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + if (priv->rate == 0) + return -EINVAL; + unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); - priv->nsecs = unit * NSEC_PER_SEC; - if (priv->rate == 0 || priv->nsecs < unit) + if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs)) return -EOVERFLOW; if (tb[NFTA_LIMIT_BURST]) @@ -77,18 +79,25 @@ static int nft_limit_init(struct nft_limit_priv *priv, if (pkts && priv->burst == 0) priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT; - if (priv->rate + priv->burst < priv->rate) + if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst)) return -EOVERFLOW; if (pkts) { - tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst; + u64 tmp = div64_u64(priv->nsecs, priv->rate); + + if (check_mul_overflow(tmp, priv->burst, &tokens)) + return -EOVERFLOW; } else { + u64 tmp; + /* The token bucket size limits the number of tokens can be * accumulated. tokens_max specifies the bucket size. * tokens_max = unit * (rate + burst) / rate. */ - tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst), - priv->rate); + if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp)) + return -EOVERFLOW; + + tokens = div64_u64(tmp, priv->rate); } if (tb[NFTA_LIMIT_FLAGS]) { From 2aa515b5b5011a7e7182af7eda89fb3bf5995c65 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 27 Jan 2024 18:15:39 +0000 Subject: [PATCH 103/187] btrfs: fix infinite directory reads commit 9b378f6ad48cfa195ed868db9123c09ee7ec5ea2 upstream. The readdir implementation currently processes always up to the last index it finds. This however can result in an infinite loop if the directory has a large number of entries such that they won't all fit in the given buffer passed to the readdir callback, that is, dir_emit() returns a non-zero value. Because in that case readdir() will be called again and if in the meanwhile new directory entries were added and we still can't put all the remaining entries in the buffer, we keep repeating this over and over. The following C program and test script reproduce the problem: $ cat /mnt/readdir_prog.c #include #include #include int main(int argc, char *argv[]) { DIR *dir = opendir("."); struct dirent *dd; while ((dd = readdir(dir))) { printf("%s\n", dd->d_name); rename(dd->d_name, "TEMPFILE"); rename("TEMPFILE", dd->d_name); } closedir(dir); } $ gcc -o /mnt/readdir_prog /mnt/readdir_prog.c $ cat test.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi mkfs.btrfs -f $DEV &> /dev/null #mkfs.xfs -f $DEV &> /dev/null #mkfs.ext4 -F $DEV &> /dev/null mount $DEV $MNT mkdir $MNT/testdir for ((i = 1; i <= 2000; i++)); do echo -n > $MNT/testdir/file_$i done cd $MNT/testdir /mnt/readdir_prog cd /mnt umount $MNT This behaviour is surprising to applications and it's unlike ext4, xfs, tmpfs, vfat and other filesystems, which always finish. In this case where new entries were added due to renames, some file names may be reported more than once, but this varies according to each filesystem - for example ext4 never reported the same file more than once while xfs reports the first 13 file names twice. So change our readdir implementation to track the last index number when opendir() is called and then make readdir() never process beyond that index number. This gives the same behaviour as ext4. Reported-by: Rob Landley Link: https://lore.kernel.org/linux-btrfs/2c8c55ec-04c6-e0dc-9c5c-8c7924778c35@landley.net/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=217681 CC: stable@vger.kernel.org # 5.15 Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h | 1 + fs/btrfs/delayed-inode.c | 5 +- fs/btrfs/delayed-inode.h | 1 + fs/btrfs/inode.c | 131 +++++++++++++++++++++++---------------- 4 files changed, 84 insertions(+), 54 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 27d06bb5e5c0..cca1acf2e037 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1552,6 +1552,7 @@ struct btrfs_drop_extents_args { struct btrfs_file_private { void *filldir_buf; + u64 last_index; }; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 1331e56e8e84..c6426080cf0a 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1665,6 +1665,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode) } bool btrfs_readdir_get_delayed_items(struct inode *inode, + u64 last_index, struct list_head *ins_list, struct list_head *del_list) { @@ -1684,14 +1685,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode, mutex_lock(&delayed_node->mutex); item = __btrfs_first_delayed_insertion_item(delayed_node); - while (item) { + while (item && item->index <= last_index) { refcount_inc(&item->refs); list_add_tail(&item->readdir_list, ins_list); item = __btrfs_next_delayed_item(item); } item = __btrfs_first_delayed_deletion_item(delayed_node); - while (item) { + while (item && item->index <= last_index) { refcount_inc(&item->refs); list_add_tail(&item->readdir_list, del_list); item = __btrfs_next_delayed_item(item); diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 0163ca637a96..fc6fae9640f8 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info); /* Used for readdir() */ bool btrfs_readdir_get_delayed_items(struct inode *inode, + u64 last_index, struct list_head *ins_list, struct list_head *del_list); void btrfs_readdir_put_delayed_items(struct inode *inode, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9a7d77c410e2..a406f0ac5f74 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5948,6 +5948,74 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } +/* + * Find the highest existing sequence number in a directory and then set the + * in-memory index_cnt variable to the first free sequence number. + */ +static int btrfs_set_inode_index_count(struct btrfs_inode *inode) +{ + struct btrfs_root *root = inode->root; + struct btrfs_key key, found_key; + struct btrfs_path *path; + struct extent_buffer *leaf; + int ret; + + key.objectid = btrfs_ino(inode); + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = (u64)-1; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + /* FIXME: we should be able to handle this */ + if (ret == 0) + goto out; + ret = 0; + + if (path->slots[0] == 0) { + inode->index_cnt = BTRFS_DIR_START_INDEX; + goto out; + } + + path->slots[0]--; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != btrfs_ino(inode) || + found_key.type != BTRFS_DIR_INDEX_KEY) { + inode->index_cnt = BTRFS_DIR_START_INDEX; + goto out; + } + + inode->index_cnt = found_key.offset + 1; +out: + btrfs_free_path(path); + return ret; +} + +static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) +{ + if (dir->index_cnt == (u64)-1) { + int ret; + + ret = btrfs_inode_delayed_dir_index_count(dir); + if (ret) { + ret = btrfs_set_inode_index_count(dir); + if (ret) + return ret; + } + } + + *index = dir->index_cnt; + + return 0; +} + /* * All this infrastructure exists because dir_emit can fault, and we are holding * the tree lock when doing readdir. For now just allocate a buffer and copy @@ -5960,10 +6028,17 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, static int btrfs_opendir(struct inode *inode, struct file *file) { struct btrfs_file_private *private; + u64 last_index; + int ret; + + ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index); + if (ret) + return ret; private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL); if (!private) return -ENOMEM; + private->last_index = last_index; private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!private->filldir_buf) { kfree(private); @@ -6030,7 +6105,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) INIT_LIST_HEAD(&ins_list); INIT_LIST_HEAD(&del_list); - put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); + put = btrfs_readdir_get_delayed_items(inode, private->last_index, + &ins_list, &del_list); again: key.type = BTRFS_DIR_INDEX_KEY; @@ -6047,6 +6123,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) break; if (found_key.offset < ctx->pos) continue; + if (found_key.offset > private->last_index) + break; if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) continue; di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); @@ -6182,57 +6260,6 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now, return dirty ? btrfs_dirty_inode(inode) : 0; } -/* - * find the highest existing sequence number in a directory - * and then set the in-memory index_cnt variable to reflect - * free sequence numbers - */ -static int btrfs_set_inode_index_count(struct btrfs_inode *inode) -{ - struct btrfs_root *root = inode->root; - struct btrfs_key key, found_key; - struct btrfs_path *path; - struct extent_buffer *leaf; - int ret; - - key.objectid = btrfs_ino(inode); - key.type = BTRFS_DIR_INDEX_KEY; - key.offset = (u64)-1; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; - /* FIXME: we should be able to handle this */ - if (ret == 0) - goto out; - ret = 0; - - if (path->slots[0] == 0) { - inode->index_cnt = BTRFS_DIR_START_INDEX; - goto out; - } - - path->slots[0]--; - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != btrfs_ino(inode) || - found_key.type != BTRFS_DIR_INDEX_KEY) { - inode->index_cnt = BTRFS_DIR_START_INDEX; - goto out; - } - - inode->index_cnt = found_key.offset + 1; -out: - btrfs_free_path(path); - return ret; -} - /* * helper to find a free sequence number in a given directory. This current * code is very simple, later versions will do smarter things in the btree From a045b6b1974005678e29ab8cbbbb2b03e1d05932 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 27 Jan 2024 18:15:40 +0000 Subject: [PATCH 104/187] btrfs: set last dir index to the current last index when opening dir commit 357950361cbc6d54fb68ed878265c647384684ae upstream. When opening a directory for reading it, we set the last index where we stop iteration to the value in struct btrfs_inode::index_cnt. That value does not match the index of the most recently added directory entry but it's instead the index number that will be assigned the next directory entry. This means that if after the call to opendir(3) new directory entries are added, a readdir(3) call will return the first new directory entry. This is fine because POSIX says the following [1]: "If a file is removed from or added to the directory after the most recent call to opendir() or rewinddir(), whether a subsequent call to readdir() returns an entry for that file is unspecified." For example for the test script from commit 9b378f6ad48c ("btrfs: fix infinite directory reads"), where we have 2000 files in a directory, ext4 doesn't return any new directory entry after opendir(3), while xfs returns the first 13 new directory entries added after the opendir(3) call. If we move to a shorter example with an empty directory when opendir(3) is called, and 2 files added to the directory after the opendir(3) call, then readdir(3) on btrfs will return the first file, ext4 and xfs return the 2 files (but in a different order). A test program for this, reported by Ian Johnson, is the following: #include #include int main(void) { DIR *dir = opendir("test"); FILE *file; file = fopen("test/1", "w"); fwrite("1", 1, 1, file); fclose(file); file = fopen("test/2", "w"); fwrite("2", 1, 1, file); fclose(file); struct dirent *entry; while ((entry = readdir(dir))) { printf("%s\n", entry->d_name); } closedir(dir); return 0; } To make this less odd, change the behaviour to never return new entries that were added after the opendir(3) call. This is done by setting the last_index field of the struct btrfs_file_private attached to the directory's file handle with a value matching btrfs_inode::index_cnt minus 1, since that value always matches the index of the next new directory entry and not the index of the most recently added entry. [1] https://pubs.opengroup.org/onlinepubs/007904875/functions/readdir_r.html Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/ CC: stable@vger.kernel.org # 6.5+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a406f0ac5f74..6f388fee0eee 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6011,7 +6011,8 @@ static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) } } - *index = dir->index_cnt; + /* index_cnt is the index number of next new entry, so decrement it. */ + *index = dir->index_cnt - 1; return 0; } From fd968e683bd966aadc789f8207c7731eeece924c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 27 Jan 2024 18:15:41 +0000 Subject: [PATCH 105/187] btrfs: refresh dir last index during a rewinddir(3) call commit e60aa5da14d01fed8411202dbe4adf6c44bd2a57 upstream. When opening a directory we find what's the index of its last entry and then store it in the directory's file handle private data (struct btrfs_file_private::last_index), so that in the case new directory entries are added to a directory after an opendir(3) call we don't end up in an infinite loop (see commit 9b378f6ad48c ("btrfs: fix infinite directory reads")) when calling readdir(3). However once rewinddir(3) is called, POSIX states [1] that any new directory entries added after the previous opendir(3) call, must be returned by subsequent calls to readdir(3): "The rewinddir() function shall reset the position of the directory stream to which dirp refers to the beginning of the directory. It shall also cause the directory stream to refer to the current state of the corresponding directory, as a call to opendir() would have done." We currently don't refresh the last_index field of the struct btrfs_file_private associated to the directory, so after a rewinddir(3) we are not returning any new entries added after the opendir(3) call. Fix this by finding the current last index of the directory when llseek is called against the directory. This can be reproduced by the following C program provided by Ian Johnson: #include #include int main(void) { DIR *dir = opendir("test"); FILE *file; file = fopen("test/1", "w"); fwrite("1", 1, 1, file); fclose(file); file = fopen("test/2", "w"); fwrite("2", 1, 1, file); fclose(file); rewinddir(dir); struct dirent *entry; while ((entry = readdir(dir))) { printf("%s\n", entry->d_name); } closedir(dir); return 0; } Reported-by: Ian Johnson Link: https://lore.kernel.org/linux-btrfs/YR1P0S.NGASEG570GJ8@ianjohnson.dev/ Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads") CC: stable@vger.kernel.org # 6.5+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6f388fee0eee..aeae04b5cef4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6049,6 +6049,19 @@ static int btrfs_opendir(struct inode *inode, struct file *file) return 0; } +static loff_t btrfs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct btrfs_file_private *private = file->private_data; + int ret; + + ret = btrfs_get_dir_last_index(BTRFS_I(file_inode(file)), + &private->last_index); + if (ret) + return ret; + + return generic_file_llseek(file, offset, whence); +} + struct dir_entry { u64 ino; u64 offset; @@ -11429,7 +11442,7 @@ static const struct inode_operations btrfs_dir_inode_operations = { }; static const struct file_operations btrfs_dir_file_operations = { - .llseek = generic_file_llseek, + .llseek = btrfs_dir_llseek, .read = generic_read_dir, .iterate_shared = btrfs_real_readdir, .open = btrfs_opendir, From c25d7922ef0f7b529dd67d74b38fbead33ad1e53 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 27 Jan 2024 18:15:42 +0000 Subject: [PATCH 106/187] btrfs: fix race between reading a directory and adding entries to it commit 8e7f82deb0c0386a03b62e30082574347f8b57d5 upstream. When opening a directory (opendir(3)) or rewinding it (rewinddir(3)), we are not holding the directory's inode locked, and this can result in later attempting to add two entries to the directory with the same index number, resulting in a transaction abort, with -EEXIST (-17), when inserting the second delayed dir index. This results in a trace like the following: Sep 11 22:34:59 myhostname kernel: BTRFS error (device dm-3): err add delayed dir index item(name: cockroach-stderr.log) into the insertion tree of the delayed node(root id: 5, inode id: 4539217, errno: -17) Sep 11 22:34:59 myhostname kernel: ------------[ cut here ]------------ Sep 11 22:34:59 myhostname kernel: kernel BUG at fs/btrfs/delayed-inode.c:1504! Sep 11 22:34:59 myhostname kernel: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI Sep 11 22:34:59 myhostname kernel: CPU: 0 PID: 7159 Comm: cockroach Not tainted 6.4.15-200.fc38.x86_64 #1 Sep 11 22:34:59 myhostname kernel: Hardware name: ASUS ESC500 G3/P9D WS, BIOS 2402 06/27/2018 Sep 11 22:34:59 myhostname kernel: RIP: 0010:btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: Code: eb dd 48 (...) Sep 11 22:34:59 myhostname kernel: RSP: 0000:ffffa9980e0fbb28 EFLAGS: 00010282 Sep 11 22:34:59 myhostname kernel: RAX: 0000000000000000 RBX: ffff8b10b8f4a3c0 RCX: 0000000000000000 Sep 11 22:34:59 myhostname kernel: RDX: 0000000000000000 RSI: ffff8b177ec21540 RDI: ffff8b177ec21540 Sep 11 22:34:59 myhostname kernel: RBP: ffff8b110cf80888 R08: 0000000000000000 R09: ffffa9980e0fb938 Sep 11 22:34:59 myhostname kernel: R10: 0000000000000003 R11: ffffffff86146508 R12: 0000000000000014 Sep 11 22:34:59 myhostname kernel: R13: ffff8b1131ae5b40 R14: ffff8b10b8f4a418 R15: 00000000ffffffef Sep 11 22:34:59 myhostname kernel: FS: 00007fb14a7fe6c0(0000) GS:ffff8b177ec00000(0000) knlGS:0000000000000000 Sep 11 22:34:59 myhostname kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Sep 11 22:34:59 myhostname kernel: CR2: 000000c00143d000 CR3: 00000001b3b4e002 CR4: 00000000001706f0 Sep 11 22:34:59 myhostname kernel: Call Trace: Sep 11 22:34:59 myhostname kernel: Sep 11 22:34:59 myhostname kernel: ? die+0x36/0x90 Sep 11 22:34:59 myhostname kernel: ? do_trap+0xda/0x100 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? do_error_trap+0x6a/0x90 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? exc_invalid_op+0x50/0x70 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? asm_exc_invalid_op+0x1a/0x20 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: ? btrfs_insert_delayed_dir_index+0x1da/0x260 Sep 11 22:34:59 myhostname kernel: btrfs_insert_dir_item+0x200/0x280 Sep 11 22:34:59 myhostname kernel: btrfs_add_link+0xab/0x4f0 Sep 11 22:34:59 myhostname kernel: ? ktime_get_real_ts64+0x47/0xe0 Sep 11 22:34:59 myhostname kernel: btrfs_create_new_inode+0x7cd/0xa80 Sep 11 22:34:59 myhostname kernel: btrfs_symlink+0x190/0x4d0 Sep 11 22:34:59 myhostname kernel: ? schedule+0x5e/0xd0 Sep 11 22:34:59 myhostname kernel: ? __d_lookup+0x7e/0xc0 Sep 11 22:34:59 myhostname kernel: vfs_symlink+0x148/0x1e0 Sep 11 22:34:59 myhostname kernel: do_symlinkat+0x130/0x140 Sep 11 22:34:59 myhostname kernel: __x64_sys_symlinkat+0x3d/0x50 Sep 11 22:34:59 myhostname kernel: do_syscall_64+0x5d/0x90 Sep 11 22:34:59 myhostname kernel: ? syscall_exit_to_user_mode+0x2b/0x40 Sep 11 22:34:59 myhostname kernel: ? do_syscall_64+0x6c/0x90 Sep 11 22:34:59 myhostname kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc The race leading to the problem happens like this: 1) Directory inode X is loaded into memory, its ->index_cnt field is initialized to (u64)-1 (at btrfs_alloc_inode()); 2) Task A is adding a new file to directory X, holding its vfs inode lock, and calls btrfs_set_inode_index() to get an index number for the entry. Because the inode's index_cnt field is set to (u64)-1 it calls btrfs_inode_delayed_dir_index_count() which fails because no dir index entries were added yet to the delayed inode and then it calls btrfs_set_inode_index_count(). This functions finds the last dir index key and then sets index_cnt to that index value + 1. It found that the last index key has an offset of 100. However before it assigns a value of 101 to index_cnt... 3) Task B calls opendir(3), ending up at btrfs_opendir(), where the VFS lock for inode X is not taken, so it calls btrfs_get_dir_last_index() and sees index_cnt still with a value of (u64)-1. Because of that it calls btrfs_inode_delayed_dir_index_count() which fails since no dir index entries were added to the delayed inode yet, and then it also calls btrfs_set_inode_index_count(). This also finds that the last index key has an offset of 100, and before it assigns the value 101 to the index_cnt field of inode X... 4) Task A assigns a value of 101 to index_cnt. And then the code flow goes to btrfs_set_inode_index() where it increments index_cnt from 101 to 102. Task A then creates a delayed dir index entry with a sequence number of 101 and adds it to the delayed inode; 5) Task B assigns 101 to the index_cnt field of inode X; 6) At some later point when someone tries to add a new entry to the directory, btrfs_set_inode_index() will return 101 again and shortly after an attempt to add another delayed dir index key with index number 101 will fail with -EEXIST resulting in a transaction abort. Fix this by locking the inode at btrfs_get_dir_last_index(), which is only only used when opening a directory or attempting to lseek on it. Reported-by: ken Link: https://lore.kernel.org/linux-btrfs/CAE6xmH+Lp=Q=E61bU+v9eWX8gYfLvu6jLYxjxjFpo3zHVPR0EQ@mail.gmail.com/ Reported-by: syzbot+d13490c82ad5353c779d@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/ Fixes: 9b378f6ad48c ("btrfs: fix infinite directory reads") CC: stable@vger.kernel.org # 6.5+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index aeae04b5cef4..38f253a2d170 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6000,21 +6000,24 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode) static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index) { - if (dir->index_cnt == (u64)-1) { - int ret; + int ret = 0; + btrfs_inode_lock(&dir->vfs_inode, 0); + if (dir->index_cnt == (u64)-1) { ret = btrfs_inode_delayed_dir_index_count(dir); if (ret) { ret = btrfs_set_inode_index_count(dir); if (ret) - return ret; + goto out; } } /* index_cnt is the index number of next new entry, so decrement it. */ *index = dir->index_cnt - 1; +out: + btrfs_inode_unlock(&dir->vfs_inode, 0); - return 0; + return ret; } /* From ed5b62bbd4519f06bb237ee715f1d4dbea9cabbc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Jan 2024 13:34:32 +0100 Subject: [PATCH 107/187] netfilter: nf_tables: restrict anonymous set and map names to 16 bytes [ Upstream commit b462579b2b86a8f5230543cadd3a4836be27baf7 ] nftables has two types of sets/maps, one where userspace defines the name, and anonymous sets/maps, where userspace defines a template name. For the latter, kernel requires presence of exactly one "%d". nftables uses "__set%d" and "__map%d" for this. The kernel will expand the format specifier and replaces it with the smallest unused number. As-is, userspace could define a template name that allows to move the set name past the 256 bytes upperlimit (post-expansion). I don't see how this could be a problem, but I would prefer if userspace cannot do this, so add a limit of 16 bytes for the '%d' template name. 16 bytes is the old total upper limit for set names that existed when nf_tables was merged initially. Fixes: 387454901bd6 ("netfilter: nf_tables: Allow set names of up to 255 chars") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2702294ac46c..75b21cd1b2c9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -24,6 +24,7 @@ #include #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) +#define NFT_SET_MAX_ANONLEN 16 unsigned int nf_tables_net_id __read_mostly; @@ -4127,6 +4128,9 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, if (p[1] != 'd' || strchr(p + 2, '%')) return -EINVAL; + if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN) + return -EINVAL; + inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (inuse == NULL) return -ENOMEM; From 67ee37360d41df6aaec939bfd7219a54858a5e4e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 23 Jan 2024 16:38:25 +0100 Subject: [PATCH 108/187] netfilter: nf_tables: validate NFPROTO_* family [ Upstream commit d0009effa8862c20a13af4cb7475d9771b905693 ] Several expressions explicitly refer to NF_INET_* hook definitions from expr->ops->validate, however, family is not validated. Bail out with EOPNOTSUPP in case they are used from unsupported families. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Fixes: 2fa841938c64 ("netfilter: nf_tables: introduce routing expression") Fixes: 554ced0a6e29 ("netfilter: nf_tables: add support for native socket matching") Fixes: ad49d86e07a4 ("netfilter: nf_tables: Add synproxy support") Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") Fixes: 6c47260250fc ("netfilter: nf_tables: add xfrm expression") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_compat.c | 12 ++++++++++++ net/netfilter/nft_flow_offload.c | 5 +++++ net/netfilter/nft_nat.c | 5 +++++ net/netfilter/nft_rt.c | 5 +++++ net/netfilter/nft_socket.c | 5 +++++ net/netfilter/nft_synproxy.c | 7 +++++-- net/netfilter/nft_tproxy.c | 5 +++++ net/netfilter/nft_xfrm.c | 5 +++++ 8 files changed, 47 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index c16172427622..6952da7dfc02 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -349,6 +349,12 @@ static int nft_target_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -592,6 +598,12 @@ static int nft_match_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 8a43f6f9c90b..3d9f6dda5aeb 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -380,6 +380,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx, { unsigned int hook_mask = (1 << NF_INET_FORWARD); + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, hook_mask); } diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 353c090f8891..ba7bcce724ef 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -142,6 +142,11 @@ static int nft_nat_validate(const struct nft_ctx *ctx, struct nft_nat *priv = nft_expr_priv(expr); int err; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); if (err < 0) return err; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 71931ec91721..7d21e16499bf 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -166,6 +166,11 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp const struct nft_rt *priv = nft_expr_priv(expr); unsigned int hooks; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + switch (priv->key) { case NFT_RT_NEXTHOP4: case NFT_RT_NEXTHOP6: diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 777561b71fcb..f28324fd8d71 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -242,6 +242,11 @@ static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 6cf9a04fbfe2..a450f28a5ef6 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -186,7 +186,6 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx, break; #endif case NFPROTO_INET: - case NFPROTO_BRIDGE: err = nf_synproxy_ipv4_init(snet, ctx->net); if (err) goto nf_ct_failure; @@ -219,7 +218,6 @@ static void nft_synproxy_do_destroy(const struct nft_ctx *ctx) break; #endif case NFPROTO_INET: - case NFPROTO_BRIDGE: nf_synproxy_ipv4_fini(snet, ctx->net); nf_synproxy_ipv6_fini(snet, ctx->net); break; @@ -253,6 +251,11 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD)); } diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 62da25ad264b..adb50c39572e 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -316,6 +316,11 @@ static int nft_tproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING); } diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 1c5343c936a8..30259846c352 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -235,6 +235,11 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e const struct nft_xfrm *priv = nft_expr_priv(expr); unsigned int hooks; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + switch (priv->dir) { case XFRM_POLICY_IN: hooks = (1 << NF_INET_FORWARD) | From acb6eaf2eac94490940c317fe68d9b8cb99296dc Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Mon, 22 Jan 2024 19:19:09 +0100 Subject: [PATCH 109/187] net: stmmac: Wait a bit for the reset to take effect [ Upstream commit a5f5eee282a0aae80227697e1d9c811b1726d31d ] otherwise the synopsys_id value may be read out wrong, because the GMAC_VERSION register might still be in reset state, for at least 1 us after the reset is de-asserted. Add a wait for 10 us before continuing to be on the safe side. > From what have you got that delay value? Just try and error, with very old linux versions and old gcc versions the synopsys_id was read out correctly most of the time (but not always), with recent linux versions and recnet gcc versions it was read out wrongly most of the time, but again not always. I don't have access to the VHDL code in question, so I cannot tell why it takes so long to get the correct values, I also do not have more than a few hardware samples, so I cannot tell how long this timeout must be in worst case. Experimentally I can tell that the register is read several times as zero immediately after the reset is de-asserted, also adding several no-ops is not enough, adding a printk is enough, also udelay(1) seems to be enough but I tried that not very often, and I have not access to many hardware samples to be 100% sure about the necessary delay. And since the udelay here is only executed once per device instance, it seems acceptable to delay the boot for 10 us. BTW: my hardware's synopsys id is 0x37. Fixes: c5e4ddbdfa11 ("net: stmmac: Add support for optional reset control") Signed-off-by: Bernd Edlinger Reviewed-by: Jiri Pirko Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/AS8P193MB1285A810BD78C111E7F6AA34E4752@AS8P193MB1285.EURP193.PROD.OUTLOOK.COM Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8f8de14347a9..e988a60c8561 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7166,6 +7166,9 @@ int stmmac_dvr_probe(struct device *device, dev_err(priv->device, "unable to bring out of ahb reset: %pe\n", ERR_PTR(ret)); + /* Wait a bit for the reset to take effect */ + udelay(10); + /* Init MAC and get the capabilities */ ret = stmmac_hw_init(priv); if (ret) From cec65f09c47d8c2d67f2bcad6cf05c490628d1ec Mon Sep 17 00:00:00 2001 From: Jenishkumar Maheshbhai Patel Date: Thu, 18 Jan 2024 19:59:14 -0800 Subject: [PATCH 110/187] net: mvpp2: clear BM pool before initialization [ Upstream commit 9f538b415db862e74b8c5d3abbccfc1b2b6caa38 ] Register value persist after booting the kernel using kexec which results in kernel panic. Thus clear the BM pool registers before initialisation to fix the issue. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Jenishkumar Maheshbhai Patel Reviewed-by: Maxime Chevallier Link: https://lore.kernel.org/r/20240119035914.2595665-1-jpatel2@marvell.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index f936640cca4e..2f80ee84c7ec 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -614,12 +614,38 @@ static void mvpp23_bm_set_8pool_mode(struct mvpp2 *priv) mvpp2_write(priv, MVPP22_BM_POOL_BASE_ADDR_HIGH_REG, val); } +/* Cleanup pool before actual initialization in the OS */ +static void mvpp2_bm_pool_cleanup(struct mvpp2 *priv, int pool_id) +{ + unsigned int thread = mvpp2_cpu_to_thread(priv, get_cpu()); + u32 val; + int i; + + /* Drain the BM from all possible residues left by firmware */ + for (i = 0; i < MVPP2_BM_POOL_SIZE_MAX; i++) + mvpp2_thread_read(priv, thread, MVPP2_BM_PHY_ALLOC_REG(pool_id)); + + put_cpu(); + + /* Stop the BM pool */ + val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(pool_id)); + val |= MVPP2_BM_STOP_MASK; + mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(pool_id), val); +} + static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) { enum dma_data_direction dma_dir = DMA_FROM_DEVICE; int i, err, poolnum = MVPP2_BM_POOLS_NUM; struct mvpp2_port *port; + if (priv->percpu_pools) + poolnum = mvpp2_get_nrxqs(priv) * 2; + + /* Clean up the pool state in case it contains stale state */ + for (i = 0; i < poolnum; i++) + mvpp2_bm_pool_cleanup(priv, i); + if (priv->percpu_pools) { for (i = 0; i < priv->port_count; i++) { port = priv->port_list[i]; @@ -629,7 +655,6 @@ static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) } } - poolnum = mvpp2_get_nrxqs(priv) * 2; for (i = 0; i < poolnum; i++) { /* the pool in use */ int pn = i / (poolnum / 2); From 4b4dcb3f42dd3339de31d7950d375acf121cb2c0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 22 Jan 2024 22:05:29 -0800 Subject: [PATCH 111/187] selftests: netdevsim: fix the udp_tunnel_nic test [ Upstream commit 0879020a7817e7ce636372c016b4528f541c9f4d ] This test is missing a whole bunch of checks for interface renaming and one ifup. Presumably it was only used on a system with renaming disabled and NetworkManager running. Fixes: 91f430b2c49d ("selftests: net: add a test for UDP tunnel info infra") Acked-by: Paolo Abeni Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123060529.1033912-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../selftests/drivers/net/netdevsim/udp_tunnel_nic.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 1b08e042cf94..185b02d2d4cd 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -269,6 +269,7 @@ for port in 0 1; do echo 1 > $NSIM_DEV_SYS/new_port fi NSIM_NETDEV=`get_netdev_name old_netdevs` + ifconfig $NSIM_NETDEV up msg="new NIC device created" exp0=( 0 0 0 0 ) @@ -430,6 +431,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "overflow NIC table" @@ -487,6 +489,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "overflow NIC table" @@ -543,6 +546,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "destroy NIC" @@ -572,6 +576,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -632,6 +637,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error @@ -687,6 +693,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -746,6 +753,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -876,6 +884,7 @@ msg="re-add a port" echo 2 > $NSIM_DEV_SYS/del_port echo 2 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` check_tables msg="replace VxLAN in overflow table" From 5b1086d226390b3cbdbdd15b7b036b67f6ff0208 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Tue, 23 Jan 2024 01:24:42 +0800 Subject: [PATCH 112/187] fjes: fix memleaks in fjes_hw_setup [ Upstream commit f6cc4b6a3ae53df425771000e9c9540cce9b7bb1 ] In fjes_hw_setup, it allocates several memory and delay the deallocation to the fjes_hw_exit in fjes_probe through the following call chain: fjes_probe |-> fjes_hw_init |-> fjes_hw_setup |-> fjes_hw_exit However, when fjes_hw_setup fails, fjes_hw_exit won't be called and thus all the resources allocated in fjes_hw_setup will be leaked. In this patch, we free those resources in fjes_hw_setup and prevents such leaks. Fixes: 2fcbca687702 ("fjes: platform_driver's .probe and .remove routine") Signed-off-by: Zhipeng Lu Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240122172445.3841883-1-alexious@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/fjes/fjes_hw.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c index 704e949484d0..b9b5554ea862 100644 --- a/drivers/net/fjes/fjes_hw.c +++ b/drivers/net/fjes/fjes_hw.c @@ -221,21 +221,25 @@ static int fjes_hw_setup(struct fjes_hw *hw) mem_size = FJES_DEV_REQ_BUF_SIZE(hw->max_epid); hw->hw_info.req_buf = kzalloc(mem_size, GFP_KERNEL); - if (!(hw->hw_info.req_buf)) - return -ENOMEM; + if (!(hw->hw_info.req_buf)) { + result = -ENOMEM; + goto free_ep_info; + } hw->hw_info.req_buf_size = mem_size; mem_size = FJES_DEV_RES_BUF_SIZE(hw->max_epid); hw->hw_info.res_buf = kzalloc(mem_size, GFP_KERNEL); - if (!(hw->hw_info.res_buf)) - return -ENOMEM; + if (!(hw->hw_info.res_buf)) { + result = -ENOMEM; + goto free_req_buf; + } hw->hw_info.res_buf_size = mem_size; result = fjes_hw_alloc_shared_status_region(hw); if (result) - return result; + goto free_res_buf; hw->hw_info.buffer_share_bit = 0; hw->hw_info.buffer_unshare_reserve_bit = 0; @@ -246,11 +250,11 @@ static int fjes_hw_setup(struct fjes_hw *hw) result = fjes_hw_alloc_epbuf(&buf_pair->tx); if (result) - return result; + goto free_epbuf; result = fjes_hw_alloc_epbuf(&buf_pair->rx); if (result) - return result; + goto free_epbuf; spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf(&buf_pair->tx, mac, @@ -273,6 +277,25 @@ static int fjes_hw_setup(struct fjes_hw *hw) fjes_hw_init_command_registers(hw, ¶m); return 0; + +free_epbuf: + for (epidx = 0; epidx < hw->max_epid ; epidx++) { + if (epidx == hw->my_epid) + continue; + fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].tx); + fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].rx); + } + fjes_hw_free_shared_status_region(hw); +free_res_buf: + kfree(hw->hw_info.res_buf); + hw->hw_info.res_buf = NULL; +free_req_buf: + kfree(hw->hw_info.req_buf); + hw->hw_info.req_buf = NULL; +free_ep_info: + kfree(hw->ep_shm_info); + hw->ep_shm_info = NULL; + return result; } static void fjes_hw_cleanup(struct fjes_hw *hw) From 0a5a083c2b6af6bcc9c726c54dd0bfdb9a12e161 Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Tue, 23 Jan 2024 10:51:41 -0600 Subject: [PATCH 113/187] net: fec: fix the unhandled context fault from smmu [ Upstream commit 5e344807735023cd3a67c37a1852b849caa42620 ] When repeatedly changing the interface link speed using the command below: ethtool -s eth0 speed 100 duplex full ethtool -s eth0 speed 1000 duplex full The following errors may sometimes be reported by the ARM SMMU driver: [ 5395.035364] fec 5b040000.ethernet eth0: Link is Down [ 5395.039255] arm-smmu 51400000.iommu: Unhandled context fault: fsr=0x402, iova=0x00000000, fsynr=0x100001, cbfrsynra=0x852, cb=2 [ 5398.108460] fec 5b040000.ethernet eth0: Link is Up - 100Mbps/Full - flow control off It is identified that the FEC driver does not properly stop the TX queue during the link speed transitions, and this results in the invalid virtual I/O address translations from the SMMU and causes the context faults. Fixes: dbc64a8ea231 ("net: fec: move calls to quiesce/resume packet processing out of fec_restart()") Signed-off-by: Shenwei Wang Link: https://lore.kernel.org/r/20240123165141.2008104-1-shenwei.wang@nxp.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 6d1b76002282..97d12c7eea77 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1917,6 +1917,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) /* if any of the above changed restart the FEC */ if (status_change) { + netif_stop_queue(ndev); napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); @@ -1926,6 +1927,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) } } else { if (fep->link) { + netif_stop_queue(ndev); napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_stop(ndev); From d9c54763e5cdbbd3f81868597fe8aca3c96e6387 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Jan 2024 20:59:08 -0700 Subject: [PATCH 114/187] nbd: always initialize struct msghdr completely commit 78fbb92af27d0982634116c7a31065f24d092826 upstream. syzbot complains that msg->msg_get_inq value can be uninitialized [1] struct msghdr got many new fields recently, we should always make sure their values is zero by default. [1] BUG: KMSAN: uninit-value in tcp_recvmsg+0x686/0xac0 net/ipv4/tcp.c:2571 tcp_recvmsg+0x686/0xac0 net/ipv4/tcp.c:2571 inet_recvmsg+0x131/0x580 net/ipv4/af_inet.c:879 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg+0x12b/0x1e0 net/socket.c:1066 __sock_xmit+0x236/0x5c0 drivers/block/nbd.c:538 nbd_read_reply drivers/block/nbd.c:732 [inline] recv_work+0x262/0x3100 drivers/block/nbd.c:863 process_one_work kernel/workqueue.c:2627 [inline] process_scheduled_works+0x104e/0x1e70 kernel/workqueue.c:2700 worker_thread+0xf45/0x1490 kernel/workqueue.c:2781 kthread+0x3ed/0x540 kernel/kthread.c:388 ret_from_fork+0x66/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 Local variable msg created at: __sock_xmit+0x4c/0x5c0 drivers/block/nbd.c:513 nbd_read_reply drivers/block/nbd.c:732 [inline] recv_work+0x262/0x3100 drivers/block/nbd.c:863 CPU: 1 PID: 7465 Comm: kworker/u5:1 Not tainted 6.7.0-rc7-syzkaller-00041-gf016f7547aee #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Workqueue: nbd5-recv recv_work Fixes: f94fd25cb0aa ("tcp: pass back data left in socket after receive") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: stable@vger.kernel.org Cc: Josef Bacik Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: nbd@other.debian.org Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240112132657.647112-1-edumazet@google.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/nbd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 8037aaefeb2e..9a53165de4ce 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -494,7 +494,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, struct iov_iter *iter, int msg_flags, int *sent) { int result; - struct msghdr msg; + struct msghdr msg = { }; unsigned int noreclaim_flag; if (unlikely(!sock)) { @@ -509,10 +509,6 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, noreclaim_flag = memalloc_noreclaim_save(); do { sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_control = NULL; - msg.msg_controllen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (send) From 9ebd514fbdebe25f851cc39f4476c75e923d8de1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 Jan 2024 11:48:47 -0800 Subject: [PATCH 115/187] btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted commit 3324d0547861b16cf436d54abba7052e0c8aa9de upstream. Sweet Tea spotted a race between subvolume deletion and snapshotting that can result in the root item for the snapshot having the BTRFS_ROOT_SUBVOL_DEAD flag set. The race is: Thread 1 | Thread 2 ----------------------------------------------|---------- btrfs_delete_subvolume | btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)| |btrfs_mksubvol | down_read(subvol_sem) | create_snapshot | ... | create_pending_snapshot | copy root item from source down_write(subvol_sem) | This flag is only checked in send and swap activate, which this would cause to fail mysteriously. create_snapshot() now checks the root refs to reject a deleted subvolume, so we can fix this by locking subvol_sem earlier so that the BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically. CC: stable@vger.kernel.org # 4.14+ Reported-by: Sweet Tea Dorminy Reviewed-by: Sweet Tea Dorminy Reviewed-by: Anand Jain Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 38f253a2d170..82f92b5652a7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4689,6 +4689,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) u64 root_flags; int ret; + down_write(&fs_info->subvol_sem); + /* * Don't allow to delete a subvolume with send in progress. This is * inside the inode lock so the error handling that has to drop the bit @@ -4700,25 +4702,25 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) btrfs_warn(fs_info, "attempt to delete subvolume %llu during send", dest->root_key.objectid); - return -EPERM; + ret = -EPERM; + goto out_up_write; } if (atomic_read(&dest->nr_swapfiles)) { spin_unlock(&dest->root_item_lock); btrfs_warn(fs_info, "attempt to delete subvolume %llu with active swapfile", root->root_key.objectid); - return -EPERM; + ret = -EPERM; + goto out_up_write; } root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags | BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); - down_write(&fs_info->subvol_sem); - ret = may_destroy_subvol(dest); if (ret) - goto out_up_write; + goto out_undead; btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); /* @@ -4728,7 +4730,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); if (ret) - goto out_up_write; + goto out_undead; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { @@ -4794,15 +4796,17 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) inode->i_flags |= S_DEAD; out_release: btrfs_subvolume_release_metadata(root, &block_rsv); -out_up_write: - up_write(&fs_info->subvol_sem); +out_undead: if (ret) { spin_lock(&dest->root_item_lock); root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); - } else { + } +out_up_write: + up_write(&fs_info->subvol_sem); + if (!ret) { d_invalidate(dentry); btrfs_prune_dentries(dest); ASSERT(dest->send_in_progress == 0); From c91c247be4de2d5a4d0efe636712bfc93c27b7e9 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 3 Jan 2024 13:31:27 +0300 Subject: [PATCH 116/187] btrfs: ref-verify: free ref cache before clearing mount opt commit f03e274a8b29d1d1c1bbd7f764766cb5ca537ab7 upstream. As clearing REF_VERIFY mount option indicates there were some errors in a ref-verify process, a ref cache is not relevant anymore and should be freed. btrfs_free_ref_cache() requires REF_VERIFY option being set so call it just before clearing the mount option. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Reported-by: syzbot+be14ed7728594dc8bd42@syzkaller.appspotmail.com Fixes: fd708b81d972 ("Btrfs: add a extent ref verify tool") CC: stable@vger.kernel.org # 5.4+ Closes: https://lore.kernel.org/lkml/000000000000e5a65c05ee832054@google.com/ Reported-by: syzbot+c563a3c79927971f950f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000007fe09705fdc6086c@google.com/ Reviewed-by: Anand Jain Signed-off-by: Fedor Pchelkin Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ref-verify.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 4b052d4009d3..8083fe866d2b 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -883,8 +883,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, out_unlock: spin_unlock(&fs_info->ref_verify_lock); out: - if (ret) + if (ret) { + btrfs_free_ref_cache(fs_info); btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); + } return ret; } @@ -1015,8 +1017,8 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) } } if (ret) { - btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); btrfs_free_ref_cache(fs_info); + btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); } btrfs_free_path(path); return ret; From b60f748a2fb637eec431f577b30b33540e733224 Mon Sep 17 00:00:00 2001 From: Chung-Chiang Cheng Date: Fri, 12 Jan 2024 15:41:05 +0800 Subject: [PATCH 117/187] btrfs: tree-checker: fix inline ref size in error messages commit f398e70dd69e6ceea71463a5380e6118f219197e upstream. The error message should accurately reflect the size rather than the type. Fixes: f82d1c7ca8ae ("btrfs: tree-checker: Add EXTENT_ITEM and METADATA_ITEM check") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Filipe Manana Reviewed-by: Qu Wenruo Signed-off-by: Chung-Chiang Cheng Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 2b39c7f9226f..02e8398246ae 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1399,7 +1399,7 @@ static int check_extent_item(struct extent_buffer *leaf, if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) { extent_err(leaf, slot, "inline ref item overflows extent item, ptr %lu iref size %u end %lu", - ptr, inline_type, end); + ptr, btrfs_extent_inline_ref_size(inline_type), end); return -EUCLEAN; } From 86aff7c5f7c6722041545825162dcf1b30bbeff2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 15 Jan 2024 20:30:26 +0100 Subject: [PATCH 118/187] btrfs: don't warn if discard range is not aligned to sector commit a208b3f132b48e1f94f620024e66fea635925877 upstream. There's a warning in btrfs_issue_discard() when the range is not aligned to 512 bytes, originally added in 4d89d377bbb0 ("btrfs: btrfs_issue_discard ensure offset/length are aligned to sector boundaries"). We can't do sub-sector writes anyway so the adjustment is the only thing that we can do and the warning is unnecessary. CC: stable@vger.kernel.org # 4.19+ Reported-by: syzbot+4a4f1eba14eb5c3417d1@syzkaller.appspotmail.com Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2a7c9088fe1f..7be3fcfb3f97 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1209,7 +1209,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, u64 bytes_left, end; u64 aligned_start = ALIGN(start, 1 << 9); - if (WARN_ON(start != aligned_start)) { + /* Adjust the range to be aligned to 512B sectors if necessary. */ + if (start != aligned_start) { len -= aligned_start - start; len = round_down(len, 1 << 9); start = aligned_start; From 52e02f26d01a61d598bb5b9ce3d336562b4e0b5a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 10 Jan 2024 08:58:26 +1030 Subject: [PATCH 119/187] btrfs: defrag: reject unknown flags of btrfs_ioctl_defrag_range_args commit 173431b274a9a54fc10b273b46e67f46bcf62d2e upstream. Add extra sanity check for btrfs_ioctl_defrag_range_args::flags. This is not really to enhance fuzzing tests, but as a preparation for future expansion on btrfs_ioctl_defrag_range_args. In the future we're going to add new members, allowing more fine tuning for btrfs defrag. Without the -ENONOTSUPP error, there would be no way to detect if the kernel supports those new defrag features. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 4 ++++ include/uapi/linux/btrfs.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e8e4781c48a5..8c656cb4295a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3500,6 +3500,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EFAULT; goto out; } + if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) { + ret = -EOPNOTSUPP; + goto out; + } /* compression requires us to start the IO */ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { range.flags |= BTRFS_DEFRAG_RANGE_START_IO; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index d4d4fa0bb362..24b54635bae9 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -585,6 +585,9 @@ struct btrfs_ioctl_clone_range_args { */ #define BTRFS_DEFRAG_RANGE_COMPRESS 1 #define BTRFS_DEFRAG_RANGE_START_IO 2 +#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \ + BTRFS_DEFRAG_RANGE_START_IO) + struct btrfs_ioctl_defrag_range_args { /* start of the defrag operation */ __u64 start; From 6e6bca99e8d88d989a7cde4c064abea552d5219b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 Jan 2024 11:48:46 -0800 Subject: [PATCH 120/187] btrfs: don't abort filesystem when attempting to snapshot deleted subvolume commit 7081929ab2572920e94d70be3d332e5c9f97095a upstream. If the source file descriptor to the snapshot ioctl refers to a deleted subvolume, we get the following abort: BTRFS: Transaction aborted (error -2) WARNING: CPU: 0 PID: 833 at fs/btrfs/transaction.c:1875 create_pending_snapshot+0x1040/0x1190 [btrfs] Modules linked in: pata_acpi btrfs ata_piix libata scsi_mod virtio_net blake2b_generic xor net_failover virtio_rng failover scsi_common rng_core raid6_pq libcrc32c CPU: 0 PID: 833 Comm: t_snapshot_dele Not tainted 6.7.0-rc6 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014 RIP: 0010:create_pending_snapshot+0x1040/0x1190 [btrfs] RSP: 0018:ffffa09c01337af8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff9982053e7c78 RCX: 0000000000000027 RDX: ffff99827dc20848 RSI: 0000000000000001 RDI: ffff99827dc20840 RBP: ffffa09c01337c00 R08: 0000000000000000 R09: ffffa09c01337998 R10: 0000000000000003 R11: ffffffffb96da248 R12: fffffffffffffffe R13: ffff99820535bb28 R14: ffff99820b7bd000 R15: ffff99820381ea80 FS: 00007fe20aadabc0(0000) GS:ffff99827dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000559a120b502f CR3: 00000000055b6000 CR4: 00000000000006f0 Call Trace: ? create_pending_snapshot+0x1040/0x1190 [btrfs] ? __warn+0x81/0x130 ? create_pending_snapshot+0x1040/0x1190 [btrfs] ? report_bug+0x171/0x1a0 ? handle_bug+0x3a/0x70 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? create_pending_snapshot+0x1040/0x1190 [btrfs] ? create_pending_snapshot+0x1040/0x1190 [btrfs] create_pending_snapshots+0x92/0xc0 [btrfs] btrfs_commit_transaction+0x66b/0xf40 [btrfs] btrfs_mksubvol+0x301/0x4d0 [btrfs] btrfs_mksnapshot+0x80/0xb0 [btrfs] __btrfs_ioctl_snap_create+0x1c2/0x1d0 [btrfs] btrfs_ioctl_snap_create_v2+0xc4/0x150 [btrfs] btrfs_ioctl+0x8a6/0x2650 [btrfs] ? kmem_cache_free+0x22/0x340 ? do_sys_openat2+0x97/0xe0 __x64_sys_ioctl+0x97/0xd0 do_syscall_64+0x46/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 RIP: 0033:0x7fe20abe83af RSP: 002b:00007ffe6eff1360 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fe20abe83af RDX: 00007ffe6eff23c0 RSI: 0000000050009417 RDI: 0000000000000003 RBP: 0000000000000003 R08: 0000000000000000 R09: 00007fe20ad16cd0 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffe6eff13c0 R14: 00007fe20ad45000 R15: 0000559a120b6d58 ---[ end trace 0000000000000000 ]--- BTRFS: error (device vdc: state A) in create_pending_snapshot:1875: errno=-2 No such entry BTRFS info (device vdc: state EA): forced readonly BTRFS warning (device vdc: state EA): Skipping commit of aborted transaction. BTRFS: error (device vdc: state EA) in cleanup_transaction:2055: errno=-2 No such entry This happens because create_pending_snapshot() initializes the new root item as a copy of the source root item. This includes the refs field, which is 0 for a deleted subvolume. The call to btrfs_insert_root() therefore inserts a root with refs == 0. btrfs_get_new_fs_root() then finds the root and returns -ENOENT if refs == 0, which causes create_pending_snapshot() to abort. Fix it by checking the source root's refs before attempting the snapshot, but after locking subvol_sem to avoid racing with deletion. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Sweet Tea Dorminy Reviewed-by: Anand Jain Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8c656cb4295a..8516c70b5edc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -780,6 +780,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, return -EOPNOTSUPP; } + if (btrfs_root_refs(&root->root_item) == 0) + return -ENOENT; + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) return -EINVAL; From 4646445756ace2923ad4ffa1b7ae21211e9bec0c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 17 Jan 2024 18:59:44 +0100 Subject: [PATCH 121/187] rbd: don't move requests to the running list on errors commit ded080c86b3f99683774af0441a58fc2e3d60cae upstream. The running list is supposed to contain requests that are pinning the exclusive lock, i.e. those that must be flushed before exclusive lock is released. When wake_lock_waiters() is called to handle an error, requests on the acquiring list are failed with that error and no flushing takes place. Briefly moving them to the running list is not only pointless but also harmful: if exclusive lock gets acquired before all of their state machines are scheduled and go through rbd_lock_del_request(), we trigger rbd_assert(list_empty(&rbd_dev->running_list)); in rbd_try_acquire_lock(). Cc: stable@vger.kernel.org Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code") Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index afc92869cba4..f58ca9ce3503 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3453,14 +3453,15 @@ static bool rbd_lock_add_request(struct rbd_img_request *img_req) static void rbd_lock_del_request(struct rbd_img_request *img_req) { struct rbd_device *rbd_dev = img_req->rbd_dev; - bool need_wakeup; + bool need_wakeup = false; lockdep_assert_held(&rbd_dev->lock_rwsem); spin_lock(&rbd_dev->lock_lists_lock); - rbd_assert(!list_empty(&img_req->lock_item)); - list_del_init(&img_req->lock_item); - need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && - list_empty(&rbd_dev->running_list)); + if (!list_empty(&img_req->lock_item)) { + list_del_init(&img_req->lock_item); + need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && + list_empty(&rbd_dev->running_list)); + } spin_unlock(&rbd_dev->lock_lists_lock); if (need_wakeup) complete(&rbd_dev->releasing_wait); @@ -3843,14 +3844,19 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result) return; } - list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) { + while (!list_empty(&rbd_dev->acquiring_list)) { + img_req = list_first_entry(&rbd_dev->acquiring_list, + struct rbd_img_request, lock_item); mutex_lock(&img_req->state_mutex); rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK); + if (!result) + list_move_tail(&img_req->lock_item, + &rbd_dev->running_list); + else + list_del_init(&img_req->lock_item); rbd_img_schedule(img_req, result); mutex_unlock(&img_req->state_mutex); } - - list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list); } static bool locker_equal(const struct ceph_locker *lhs, From dcc54a54de544ae802d279847de0bb405ff07a19 Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Mon, 22 Jan 2024 19:34:21 +0100 Subject: [PATCH 122/187] exec: Fix error handling in begin_new_exec() commit 84c39ec57d409e803a9bb6e4e85daf1243e0e80b upstream. If get_unused_fd_flags() fails, the error handling is incomplete because bprm->cred is already set to NULL, and therefore free_bprm will not unlock the cred_guard_mutex. Note there are two error conditions which end up here, one before and one after bprm->cred is cleared. Fixes: b8a61c9e7b4a ("exec: Generic execfd support") Signed-off-by: Bernd Edlinger Acked-by: Eric W. Biederman Link: https://lore.kernel.org/r/AS8P193MB128517ADB5EFF29E04389EDAE4752@AS8P193MB1285.EURP193.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/exec.c b/fs/exec.c index 283012eb1aeb..39f7751c90fc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1410,6 +1410,9 @@ int begin_new_exec(struct linux_binprm * bprm) out_unlock: up_write(&me->signal->exec_update_lock); + if (!bprm->cred) + mutex_unlock(&me->signal->cred_guard_mutex); + out: return retval; } From aa2cc9363926991ba74411e3aa0a0ea82c1ffe32 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 11 Jan 2024 15:07:25 +0200 Subject: [PATCH 123/187] wifi: iwlwifi: fix a memory corruption commit cf4a0d840ecc72fcf16198d5e9c505ab7d5a5e4d upstream. iwl_fw_ini_trigger_tlv::data is a pointer to a __le32, which means that if we copy to iwl_fw_ini_trigger_tlv::data + offset while offset is in bytes, we'll write past the buffer. Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218233 Fixes: cf29c5b66b9f ("iwlwifi: dbg_ini: implement time point handling") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://msgid.link/20240111150610.2d2b8b870194.I14ed76505a5cf87304e0c9cc05cc0ae85ed3bf91@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index a1d34f3e7a9f..5979d904bbbd 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -1082,7 +1082,7 @@ static int iwl_dbg_tlv_override_trig_node(struct iwl_fw_runtime *fwrt, node_trig = (void *)node_tlv->data; } - memcpy(node_trig->data + offset, trig->data, trig_data_len); + memcpy((u8 *)node_trig->data + offset, trig->data, trig_data_len); node_tlv->length = cpu_to_le32(size); if (policy & IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG) { From 5e7d8ddf2a3889c02fc2e2201ce9e34d6c308591 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 22 Jan 2024 08:20:28 -0800 Subject: [PATCH 124/187] hv_netvsc: Calculate correct ring size when PAGE_SIZE is not 4 Kbytes commit 6941f67ad37d5465b75b9ffc498fcf6897a3c00e upstream. Current code in netvsc_drv_init() incorrectly assumes that PAGE_SIZE is 4 Kbytes, which is wrong on ARM64 with 16K or 64K page size. As a result, the default VMBus ring buffer size on ARM64 with 64K page size is 8 Mbytes instead of the expected 512 Kbytes. While this doesn't break anything, a typical VM with 8 vCPUs and 8 netvsc channels wastes 120 Mbytes (8 channels * 2 ring buffers/channel * 7.5 Mbytes/ring buffer). Unfortunately, the module parameter specifying the ring buffer size is in units of 4 Kbyte pages. Ideally, it should be in units that are independent of PAGE_SIZE, but backwards compatibility prevents changing that now. Fix this by having netvsc_drv_init() hardcode 4096 instead of using PAGE_SIZE when calculating the ring buffer size in bytes. Also use the VMBUS_RING_SIZE macro to ensure proper alignment when running with page size larger than 4K. Cc: # 5.15.x Fixes: 7aff79e297ee ("Drivers: hv: Enable Hyper-V code to be built on ARM64") Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240122162028.348885-1-mhklinux@outlook.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 0285894c892a..c1aac6ceb29e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -44,7 +44,7 @@ static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); -MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); +MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); unsigned int netvsc_ring_bytes __ro_after_init; static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | @@ -2801,7 +2801,7 @@ static int __init netvsc_drv_init(void) pr_info("Increased ring_size to %u (min allowed)\n", ring_size); } - netvsc_ring_bytes = ring_size * PAGE_SIZE; + netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096); register_netdevice_notifier(&netvsc_netdev_notifier); From af149a46890e8285d1618bd68b8d159bdb87fdb3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 18 Jan 2024 10:56:26 +0100 Subject: [PATCH 125/187] netfilter: nft_chain_filter: handle NETDEV_UNREGISTER for inet/ingress basechain commit 01acb2e8666a6529697141a6017edbf206921913 upstream. Remove netdevice from inet/ingress basechain in case NETDEV_UNREGISTER event is reported, otherwise a stale reference to netdevice remains in the hook list. Fixes: 60a3815da702 ("netfilter: add inet ingress support") Cc: stable@vger.kernel.org Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_chain_filter.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 680fe557686e..274b6f7e6bb5 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_base_chain *basechain; struct nftables_pernet *nft_net; - struct nft_table *table; struct nft_chain *chain, *nr; + struct nft_table *table; struct nft_ctx ctx = { .net = dev_net(dev), }; @@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct notifier_block *this, nft_net = nft_pernet(ctx.net); mutex_lock(&nft_net->commit_mutex); list_for_each_entry(table, &nft_net->tables, list) { - if (table->family != NFPROTO_NETDEV) + if (table->family != NFPROTO_NETDEV && + table->family != NFPROTO_INET) continue; ctx.family = table->family; @@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct notifier_block *this, if (!nft_is_base_chain(chain)) continue; + basechain = nft_base_chain(chain); + if (table->family == NFPROTO_INET && + basechain->ops.hooknum != NF_INET_INGRESS) + continue; + ctx.chain = chain; nft_netdev_event(event, dev, &ctx); } From 8e34430e33b8a80bc014f3efe29cac76bc30a4b4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 20 Jan 2024 22:50:04 +0100 Subject: [PATCH 126/187] netfilter: nf_tables: reject QUEUE/DROP verdict parameters commit f342de4e2f33e0e39165d8639387aa6c19dff660 upstream. This reverts commit e0abdadcc6e1. core.c:nf_hook_slow assumes that the upper 16 bits of NF_DROP verdicts contain a valid errno, i.e. -EPERM, -EHOSTUNREACH or similar, or 0. Due to the reverted commit, its possible to provide a positive value, e.g. NF_ACCEPT (1), which results in use-after-free. Its not clear to me why this commit was made. NF_QUEUE is not used by nftables; "queue" rules in nftables will result in use of "nft_queue" expression. If we later need to allow specifiying errno values from userspace (do not know why), this has to call NF_DROP_GETERR and check that "err <= 0" holds true. Fixes: e0abdadcc6e1 ("netfilter: nf_tables: accept QUEUE/DROP verdict parameters") Cc: stable@vger.kernel.org Reported-by: Notselwyn Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 75b21cd1b2c9..1edb2138260a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10448,16 +10448,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { - default: - switch (data->verdict.code & NF_VERDICT_MASK) { - case NF_ACCEPT: - case NF_DROP: - case NF_QUEUE: - break; - default: - return -EINVAL; - } - fallthrough; + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -10492,6 +10486,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.chain = chain; break; + default: + return -EINVAL; } desc->len = sizeof(data->verdict); From 2841631a03652f32b595c563695d0461072e0de4 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 8 Jan 2024 15:20:58 +0900 Subject: [PATCH 127/187] platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5913320eb0b3ec88158cfcb0fa5e996bf4ef681b upstream. p2sb_bar() unhides P2SB device to get resources from the device. It guards the operation by locking pci_rescan_remove_lock so that parallel rescans do not find the P2SB device. However, this lock causes deadlock when PCI bus rescan is triggered by /sys/bus/pci/rescan. The rescan locks pci_rescan_remove_lock and probes PCI devices. When PCI devices call p2sb_bar() during probe, it locks pci_rescan_remove_lock again. Hence the deadlock. To avoid the deadlock, do not lock pci_rescan_remove_lock in p2sb_bar(). Instead, do the lock at fs_initcall. Introduce p2sb_cache_resources() for fs_initcall which gets and caches the P2SB resources. At p2sb_bar(), refer the cache and return to the caller. Before operating the device at P2SB DEVFN for resource cache, check that its device class is PCI_CLASS_MEMORY_OTHER 0x0580 that PCH specifications define. This avoids unexpected operation to other devices at the same DEVFN. Link: https://lore.kernel.org/linux-pci/6xb24fjmptxxn5js2fjrrddjae6twex5bjaftwqsuawuqqqydx@7cl3uik5ef6j/ Fixes: 9745fb07474f ("platform/x86/intel: Add Primary to Sideband (P2SB) bridge support") Cc: stable@vger.kernel.org Suggested-by: Andy Shevchenko Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240108062059.3583028-2-shinichiro.kawasaki@wdc.com Reviewed-by: Andy Shevchenko Reviewed-by: Ilpo Järvinen Tested-by Klara Modin Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/p2sb.c | 202 ++++++++++++++++++++++++++---------- 1 file changed, 150 insertions(+), 52 deletions(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 1cf2471d54dd..17cc4b45e023 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -26,6 +26,21 @@ static const struct x86_cpu_id p2sb_cpu_ids[] = { {} }; +/* + * Cache BAR0 of P2SB device functions 0 to 7. + * TODO: The constant 8 is the number of functions that PCI specification + * defines. Same definitions exist tree-wide. Unify this definition and + * the other definitions then move to include/uapi/linux/pci.h. + */ +#define NR_P2SB_RES_CACHE 8 + +struct p2sb_res_cache { + u32 bus_dev_id; + struct resource res; +}; + +static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; + static int p2sb_get_devfn(unsigned int *devfn) { unsigned int fn = P2SB_DEVFN_DEFAULT; @@ -39,8 +54,16 @@ static int p2sb_get_devfn(unsigned int *devfn) return 0; } +static bool p2sb_valid_resource(struct resource *res) +{ + if (res->flags) + return true; + + return false; +} + /* Copy resource from the first BAR of the device in question */ -static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) +static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) { struct resource *bar0 = &pdev->resource[0]; @@ -56,22 +79,108 @@ static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) mem->end = bar0->end; mem->flags = bar0->flags; mem->desc = bar0->desc; +} + +static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) +{ + struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; + struct pci_dev *pdev; + + pdev = pci_scan_single_device(bus, devfn); + if (!pdev) + return; + + p2sb_read_bar0(pdev, &cache->res); + cache->bus_dev_id = bus->dev.id; + + pci_stop_and_remove_bus_device(pdev); +} + +static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) +{ + unsigned int slot, fn; + + if (PCI_FUNC(devfn) == 0) { + /* + * When function number of the P2SB device is zero, scan it and + * other function numbers, and if devices are available, cache + * their BAR0s. + */ + slot = PCI_SLOT(devfn); + for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) + p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); + } else { + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + } + + if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) + return -ENOENT; return 0; } -static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +static struct pci_bus *p2sb_get_bus(struct pci_bus *bus) { - struct pci_dev *pdev; + static struct pci_bus *p2sb_bus; + + bus = bus ?: p2sb_bus; + if (bus) + return bus; + + /* Assume P2SB is on the bus 0 in domain 0 */ + p2sb_bus = pci_find_bus(0, 0); + return p2sb_bus; +} + +static int p2sb_cache_resources(void) +{ + unsigned int devfn_p2sb; + u32 value = P2SBC_HIDE; + struct pci_bus *bus; + u16 class; int ret; - pdev = pci_scan_single_device(bus, devfn); - if (!pdev) + /* Get devfn for P2SB device itself */ + ret = p2sb_get_devfn(&devfn_p2sb); + if (ret) + return ret; + + bus = p2sb_get_bus(NULL); + if (!bus) return -ENODEV; - ret = p2sb_read_bar0(pdev, mem); + /* + * When a device with same devfn exists and its device class is not + * PCI_CLASS_MEMORY_OTHER for P2SB, do not touch it. + */ + pci_bus_read_config_word(bus, devfn_p2sb, PCI_CLASS_DEVICE, &class); + if (!PCI_POSSIBLE_ERROR(class) && class != PCI_CLASS_MEMORY_OTHER) + return -ENODEV; + + /* + * Prevent concurrent PCI bus scan from seeing the P2SB device and + * removing via sysfs while it is temporarily exposed. + */ + pci_lock_rescan_remove(); + + /* + * The BIOS prevents the P2SB device from being enumerated by the PCI + * subsystem, so we need to unhide and hide it back to lookup the BAR. + * Unhide the P2SB device here, if needed. + */ + pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); + if (value & P2SBC_HIDE) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); + + ret = p2sb_scan_and_cache(bus, devfn_p2sb); + + /* Hide the P2SB device, if it was hidden */ + if (value & P2SBC_HIDE) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); + + pci_unlock_rescan_remove(); - pci_stop_and_remove_bus_device(pdev); return ret; } @@ -81,64 +190,53 @@ static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct re * @devfn: PCI slot and function to communicate with * @mem: memory resource to be filled in * - * The BIOS prevents the P2SB device from being enumerated by the PCI - * subsystem, so we need to unhide and hide it back to lookup the BAR. - * - * if @bus is NULL, the bus 0 in domain 0 will be used. + * If @bus is NULL, the bus 0 in domain 0 will be used. * If @devfn is 0, it will be replaced by devfn of the P2SB device. * * Caller must provide a valid pointer to @mem. * - * Locking is handled by pci_rescan_remove_lock mutex. - * * Return: * 0 on success or appropriate errno value on error. */ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) { - struct pci_dev *pdev_p2sb; - unsigned int devfn_p2sb; - u32 value = P2SBC_HIDE; + struct p2sb_res_cache *cache; int ret; - /* Get devfn for P2SB device itself */ - ret = p2sb_get_devfn(&devfn_p2sb); - if (ret) - return ret; - - /* if @bus is NULL, use bus 0 in domain 0 */ - bus = bus ?: pci_find_bus(0, 0); - - /* - * Prevent concurrent PCI bus scan from seeing the P2SB device and - * removing via sysfs while it is temporarily exposed. - */ - pci_lock_rescan_remove(); - - /* Unhide the P2SB device, if needed */ - pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); - if (value & P2SBC_HIDE) - pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); - - pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb); - if (devfn) - ret = p2sb_scan_and_read(bus, devfn, mem); - else - ret = p2sb_read_bar0(pdev_p2sb, mem); - pci_stop_and_remove_bus_device(pdev_p2sb); - - /* Hide the P2SB device, if it was hidden */ - if (value & P2SBC_HIDE) - pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); - - pci_unlock_rescan_remove(); - - if (ret) - return ret; - - if (mem->flags == 0) + bus = p2sb_get_bus(bus); + if (!bus) return -ENODEV; + if (!devfn) { + ret = p2sb_get_devfn(&devfn); + if (ret) + return ret; + } + + cache = &p2sb_resources[PCI_FUNC(devfn)]; + if (cache->bus_dev_id != bus->dev.id) + return -ENODEV; + + if (!p2sb_valid_resource(&cache->res)) + return -ENOENT; + + memcpy(mem, &cache->res, sizeof(*mem)); return 0; } EXPORT_SYMBOL_GPL(p2sb_bar); + +static int __init p2sb_fs_init(void) +{ + p2sb_cache_resources(); + return 0; +} + +/* + * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can + * not be locked in sysfs pci bus rescan path because of deadlock. To + * avoid the deadlock, access to P2SB devices with the lock at an early + * step in kernel initialization and cache required resources. This + * should happen after subsys_initcall which initializes PCI subsystem + * and before device_initcall which requires P2SB resources. + */ +fs_initcall(p2sb_fs_init); From 2c939c74ef0b74e99b92e32edc2a59f9b9ca3d5a Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Sun, 21 Jan 2024 15:35:06 +0800 Subject: [PATCH 128/187] ksmbd: fix global oob in ksmbd_nl_policy commit ebeae8adf89d9a82359f6659b1663d09beec2faa upstream. Similar to a reported issue (check the commit b33fb5b801c6 ("net: qualcomm: rmnet: fix global oob in rmnet_policy"), my local fuzzer finds another global out-of-bounds read for policy ksmbd_nl_policy. See bug trace below: ================================================================== BUG: KASAN: global-out-of-bounds in validate_nla lib/nlattr.c:386 [inline] BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600 Read of size 1 at addr ffffffff8f24b100 by task syz-executor.1/62810 CPU: 0 PID: 62810 Comm: syz-executor.1 Tainted: G N 6.1.0 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x8b/0xb3 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x172/0x475 mm/kasan/report.c:395 kasan_report+0xbb/0x1c0 mm/kasan/report.c:495 validate_nla lib/nlattr.c:386 [inline] __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600 __nla_parse+0x3e/0x50 lib/nlattr.c:697 __nlmsg_parse include/net/netlink.h:748 [inline] genl_family_rcv_msg_attrs_parse.constprop.0+0x1b0/0x290 net/netlink/genetlink.c:565 genl_family_rcv_msg_doit+0xda/0x330 net/netlink/genetlink.c:734 genl_family_rcv_msg net/netlink/genetlink.c:833 [inline] genl_rcv_msg+0x441/0x780 net/netlink/genetlink.c:850 netlink_rcv_skb+0x14f/0x410 net/netlink/af_netlink.c:2540 genl_rcv+0x24/0x40 net/netlink/genetlink.c:861 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x54e/0x800 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x930/0xe50 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0x154/0x190 net/socket.c:734 ____sys_sendmsg+0x6df/0x840 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fdd66a8f359 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fdd65e00168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fdd66bbcf80 RCX: 00007fdd66a8f359 RDX: 0000000000000000 RSI: 0000000020000500 RDI: 0000000000000003 RBP: 00007fdd66ada493 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc84b81aff R14: 00007fdd65e00300 R15: 0000000000022000 The buggy address belongs to the variable: ksmbd_nl_policy+0x100/0xa80 The buggy address belongs to the physical page: page:0000000034f47940 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1ccc4b flags: 0x200000000001000(reserved|node=0|zone=2) raw: 0200000000001000 ffffea00073312c8 ffffea00073312c8 0000000000000000 raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffffffff8f24b000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffffffff8f24b080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffffffff8f24b100: f9 f9 f9 f9 00 00 f9 f9 f9 f9 f9 f9 00 00 07 f9 ^ ffffffff8f24b180: f9 f9 f9 f9 00 05 f9 f9 f9 f9 f9 f9 00 00 00 05 ffffffff8f24b200: f9 f9 f9 f9 00 00 03 f9 f9 f9 f9 f9 00 00 04 f9 ================================================================== To fix it, add a placeholder named __KSMBD_EVENT_MAX and let KSMBD_EVENT_MAX to be its original value - 1 according to what other netlink families do. Also change two sites that refer the KSMBD_EVENT_MAX to correct value. Cc: stable@vger.kernel.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Lin Ma Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/ksmbd_netlink.h | 3 ++- fs/smb/server/transport_ipc.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index b7521e41402e..0ebf91ffa236 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -304,7 +304,8 @@ enum ksmbd_event { KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST, KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE = 15, - KSMBD_EVENT_MAX + __KSMBD_EVENT_MAX, + KSMBD_EVENT_MAX = __KSMBD_EVENT_MAX - 1 }; /* diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index b49d47bdafc9..f29bb03f0dc4 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -74,7 +74,7 @@ static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info) static int handle_generic_event(struct sk_buff *skb, struct genl_info *info); static int ksmbd_ipc_heartbeat_request(void); -static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = { +static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX + 1] = { [KSMBD_EVENT_UNSPEC] = { .len = 0, }, @@ -403,7 +403,7 @@ static int handle_generic_event(struct sk_buff *skb, struct genl_info *info) return -EPERM; #endif - if (type >= KSMBD_EVENT_MAX) { + if (type > KSMBD_EVENT_MAX) { WARN_ON(1); return -EINVAL; } From 7f95f6997f4fdd17abec3200cae45420a5489350 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 20 Dec 2023 17:21:12 +0000 Subject: [PATCH 129/187] firmware: arm_scmi: Check mailbox/SMT channel for consistency commit 437a310b22244d4e0b78665c3042e5d1c0f45306 upstream. On reception of a completion interrupt the shared memory area is accessed to retrieve the message header at first and then, if the message sequence number identifies a transaction which is still pending, the related payload is fetched too. When an SCMI command times out the channel ownership remains with the platform until eventually a late reply is received and, as a consequence, any further transmission attempt remains pending, waiting for the channel to be relinquished by the platform. Once that late reply is received the channel ownership is given back to the agent and any pending request is then allowed to proceed and overwrite the SMT area of the just delivered late reply; then the wait for the reply to the new request starts. It has been observed that the spurious IRQ related to the late reply can be wrongly associated with the freshly enqueued request: when that happens the SCMI stack in-flight lookup procedure is fooled by the fact that the message header now present in the SMT area is related to the new pending transaction, even though the real reply has still to arrive. This race-condition on the A2P channel can be detected by looking at the channel status bits: a genuine reply from the platform will have set the channel free bit before triggering the completion IRQ. Add a consistency check to validate such condition in the A2P ISR. Reported-by: Xinglong Yang Closes: https://lore.kernel.org/all/PUZPR06MB54981E6FA00D82BFDBB864FBF08DA@PUZPR06MB5498.apcprd06.prod.outlook.com/ Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Cc: stable@vger.kernel.org # 5.15+ Signed-off-by: Cristian Marussi Tested-by: Xinglong Yang Link: https://lore.kernel.org/r/20231220172112.763539-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/arm_scmi/common.h | 1 + drivers/firmware/arm_scmi/mailbox.c | 14 ++++++++++++++ drivers/firmware/arm_scmi/shmem.c | 6 ++++++ 3 files changed, 21 insertions(+) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index a1c0154c31c6..4ac72754c255 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -244,6 +244,7 @@ void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem, void shmem_clear_channel(struct scmi_shared_mem __iomem *shmem); bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, struct scmi_xfer *xfer); +bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem); /* declarations for message passing transports */ struct scmi_msg_payld; diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index 25d31dfdad15..81c902672a30 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -43,6 +43,20 @@ static void rx_callback(struct mbox_client *cl, void *m) { struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl); + /* + * An A2P IRQ is NOT valid when received while the platform still has + * the ownership of the channel, because the platform at first releases + * the SMT channel and then sends the completion interrupt. + * + * This addresses a possible race condition in which a spurious IRQ from + * a previous timed-out reply which arrived late could be wrongly + * associated with the next pending transaction. + */ + if (cl->knows_txdone && !shmem_channel_free(smbox->shmem)) { + dev_warn(smbox->cinfo->dev, "Ignoring spurious A2P IRQ !\n"); + return; + } + scmi_rx_callback(smbox->cinfo, shmem_read_header(smbox->shmem), NULL); } diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 87b4f4d35f06..517d52fb3bcb 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -122,3 +122,9 @@ bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, (SCMI_SHMEM_CHAN_STAT_CHANNEL_ERROR | SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); } + +bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem) +{ + return (ioread32(&shmem->channel_status) & + SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); +} From 6c495c84e24ef05b99bda7e3a6c56273c323c959 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 16 Jan 2024 15:33:07 +1100 Subject: [PATCH 130/187] xfs: read only mounts with fsopen mount API are busted commit d8d222e09dab84a17bb65dda4b94d01c565f5327 upstream. Recently xfs/513 started failing on my test machines testing "-o ro,norecovery" mount options. This was being emitted in dmesg: [ 9906.932724] XFS (pmem0): no-recovery mounts must be read-only. Turns out, readonly mounts with the fsopen()/fsconfig() mount API have been busted since day zero. It's only taken 5 years for debian unstable to start using this "new" mount API, and shortly after this I noticed xfs/513 had started to fail as per above. The syscall trace is: fsopen("xfs", FSOPEN_CLOEXEC) = 3 mount_setattr(-1, NULL, 0, NULL, 0) = -1 EINVAL (Invalid argument) ..... fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/pmem0", 0) = 0 fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0 fsconfig(3, FSCONFIG_SET_FLAG, "norecovery", NULL, 0) = 0 fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = -1 EINVAL (Invalid argument) close(3) = 0 Showing that the actual mount instantiation (FSCONFIG_CMD_CREATE) is what threw out the error. During mount instantiation, we call xfs_fs_validate_params() which does: /* No recovery flag requires a read-only mount */ if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { xfs_warn(mp, "no-recovery mounts must be read-only."); return -EINVAL; } and xfs_is_readonly() checks internal mount flags for read only state. This state is set in xfs_init_fs_context() from the context superblock flag state: /* * Copy binary VFS mount flags we are interested in. */ if (fc->sb_flags & SB_RDONLY) set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); With the old mount API, all of the VFS specific superblock flags had already been parsed and set before xfs_init_fs_context() is called, so this all works fine. However, in the brave new fsopen/fsconfig world, xfs_init_fs_context() is called from fsopen() context, before any VFS superblock have been set or parsed. Hence if we use fsopen(), the internal XFS readonly state is *never set*. Hence anything that depends on xfs_is_readonly() actually returning true for read only mounts is broken if fsopen() has been used to mount the filesystem. Fix this by moving this internal state initialisation to xfs_fs_fill_super() before we attempt to validate the parameters that have been set prior to the FSCONFIG_CMD_CREATE call being made. Signed-off-by: Dave Chinner Fixes: 73e5fff98b64 ("xfs: switch to use the new mount-api") cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_super.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 4b179526913f..12662b169b71 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1458,6 +1458,18 @@ xfs_fs_fill_super( mp->m_super = sb; + /* + * Copy VFS mount flags from the context now that all parameter parsing + * is guaranteed to have been completed by either the old mount API or + * the newer fsopen/fsconfig API. + */ + if (fc->sb_flags & SB_RDONLY) + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + if (fc->sb_flags & SB_DIRSYNC) + mp->m_features |= XFS_FEAT_DIRSYNC; + if (fc->sb_flags & SB_SYNCHRONOUS) + mp->m_features |= XFS_FEAT_WSYNC; + error = xfs_fs_validate_params(mp); if (error) goto out_free_names; @@ -1915,6 +1927,11 @@ static const struct fs_context_operations xfs_context_ops = { .free = xfs_fs_free, }; +/* + * WARNING: do not initialise any parameters in this function that depend on + * mount option parsing having already been performed as this can be called from + * fsopen() before any parameters have been set. + */ static int xfs_init_fs_context( struct fs_context *fc) { @@ -1947,16 +1964,6 @@ static int xfs_init_fs_context( mp->m_logbsize = -1; mp->m_allocsize_log = 16; /* 64k */ - /* - * Copy binary VFS mount flags we are interested in. - */ - if (fc->sb_flags & SB_RDONLY) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); - if (fc->sb_flags & SB_DIRSYNC) - mp->m_features |= XFS_FEAT_DIRSYNC; - if (fc->sb_flags & SB_SYNCHRONOUS) - mp->m_features |= XFS_FEAT_WSYNC; - fc->s_fs_info = mp; fc->ops = &xfs_context_ops; From 242996f50065b446054efc808c51ffece723422a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 17 Jan 2024 08:29:42 -0600 Subject: [PATCH 131/187] gpiolib: acpi: Ignore touchpad wakeup on GPD G1619-04 commit 805c74eac8cb306dc69b87b6b066ab4da77ceaf1 upstream. Spurious wakeups are reported on the GPD G1619-04 which can be absolved by programming the GPIO to ignore wakeups. Cc: stable@vger.kernel.org Reported-and-tested-by: George Melikov Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3073 Signed-off-by: Mario Limonciello Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-acpi.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 787d47e667ad..33d73687e463 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1626,6 +1626,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_wake = "ELAN0415:00@9", }, }, + { + /* + * Spurious wakeups from TP_ATTN# pin + * Found in BIOS 0.35 + * https://gitlab.freedesktop.org/drm/amd/-/issues/3073 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "PNP0C50:00@8", + }, + }, {} /* Terminating entry */ }; From 635e996e6e3b1b2f1356a9aecac3cac66fbc4702 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 22 Jan 2024 15:18:11 +0100 Subject: [PATCH 132/187] cpufreq: intel_pstate: Refine computation of P-state for given frequency commit 192cdb1c907fd8df2d764c5bb17496e415e59391 upstream. On systems using HWP, if a given frequency is equal to the maximum turbo frequency or the maximum non-turbo frequency, the HWP performance level corresponding to it is already known and can be used directly without any computation. Accordingly, adjust the code to use the known HWP performance levels in the cases mentioned above. This also helps to avoid limiting CPU capacity artificially in some cases when the BIOS produces the HWP_CAP numbers using a different E-core-to-P-core performance scaling factor than expected by the kernel. Fixes: f5c8cf2a4992 ("cpufreq: intel_pstate: hybrid: Use known scaling factor for P-cores") Cc: 6.1+ # 6.1+ Tested-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/intel_pstate.c | 55 +++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fbe3a4098743..abdd26f7d04c 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -493,6 +493,30 @@ static inline int intel_pstate_get_cppc_guaranteed(int cpu) } #endif /* CONFIG_ACPI_CPPC_LIB */ +static int intel_pstate_freq_to_hwp_rel(struct cpudata *cpu, int freq, + unsigned int relation) +{ + if (freq == cpu->pstate.turbo_freq) + return cpu->pstate.turbo_pstate; + + if (freq == cpu->pstate.max_freq) + return cpu->pstate.max_pstate; + + switch (relation) { + case CPUFREQ_RELATION_H: + return freq / cpu->pstate.scaling; + case CPUFREQ_RELATION_C: + return DIV_ROUND_CLOSEST(freq, cpu->pstate.scaling); + } + + return DIV_ROUND_UP(freq, cpu->pstate.scaling); +} + +static int intel_pstate_freq_to_hwp(struct cpudata *cpu, int freq) +{ + return intel_pstate_freq_to_hwp_rel(cpu, freq, CPUFREQ_RELATION_L); +} + /** * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. * @cpu: Target CPU. @@ -510,6 +534,7 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu); int scaling = cpu->pstate.scaling; + int freq; pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); @@ -523,16 +548,16 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, perf_ctl_scaling); - cpu->pstate.max_pstate_physical = - DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling, - scaling); + freq = perf_ctl_max_phys * perf_ctl_scaling; + cpu->pstate.max_pstate_physical = intel_pstate_freq_to_hwp(cpu, freq); - cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling; + freq = cpu->pstate.min_pstate * perf_ctl_scaling; + cpu->pstate.min_freq = freq; /* * Cast the min P-state value retrieved via pstate_funcs.get_min() to * the effective range of HWP performance levels. */ - cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling); + cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq); } static inline void update_turbo_state(void) @@ -2493,13 +2518,12 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, * abstract values to represent performance rather than pure ratios. */ if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) { - int scaling = cpu->pstate.scaling; int freq; freq = max_policy_perf * perf_ctl_scaling; - max_policy_perf = DIV_ROUND_UP(freq, scaling); + max_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); freq = min_policy_perf * perf_ctl_scaling; - min_policy_perf = DIV_ROUND_UP(freq, scaling); + min_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); } pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", @@ -2873,18 +2897,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, cpufreq_freq_transition_begin(policy, &freqs); - switch (relation) { - case CPUFREQ_RELATION_L: - target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling); - break; - case CPUFREQ_RELATION_H: - target_pstate = freqs.new / cpu->pstate.scaling; - break; - default: - target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling); - break; - } - + target_pstate = intel_pstate_freq_to_hwp_rel(cpu, freqs.new, relation); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false); freqs.new = target_pstate * cpu->pstate.scaling; @@ -2902,7 +2915,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, update_turbo_state(); - target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); + target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); From 62f2e79cf9f4f47cc9dea9cebdf58d9f7b5695e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 11 Dec 2023 10:16:24 +0200 Subject: [PATCH 133/187] drm: Don't unref the same fb many times by mistake due to deadlock handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cb4daf271302d71a6b9a7c01bd0b6d76febd8f0c upstream. If we get a deadlock after the fb lookup in drm_mode_page_flip_ioctl() we proceed to unref the fb and then retry the whole thing from the top. But we forget to reset the fb pointer back to NULL, and so if we then get another error during the retry, before the fb lookup, we proceed the unref the same fb again without having gotten another reference. The end result is that the fb will (eventually) end up being freed while it's still in use. Reset fb to NULL once we've unreffed it to avoid doing it again until we've done another fb lookup. This turned out to be pretty easy to hit on a DG2 when doing async flips (and CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y). The first symptom I saw that drm_closefb() simply got stuck in a busy loop while walking the framebuffer list. Fortunately I was able to convince it to oops instead, and from there it was easier to track down the culprit. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211081625.25704-1-ville.syrjala@linux.intel.com Acked-by: Javier Martinez Canillas Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 33357629a7f5..7a4af43e2cd1 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1382,6 +1382,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, out: if (fb) drm_framebuffer_put(fb); + fb = NULL; if (plane->old_fb) drm_framebuffer_put(plane->old_fb); plane->old_fb = NULL; From 2a81e844d1ffabdf4f9902b83dca35ea86895eab Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Dec 2023 15:29:00 +0300 Subject: [PATCH 134/187] drm/bridge: nxp-ptn3460: fix i2c_master_send() error checking commit 914437992876838662c968cb416f832110fb1093 upstream. The i2c_master_send/recv() functions return negative error codes or the number of bytes that were able to be sent/received. This code has two problems. 1) Instead of checking if all the bytes were sent or received, it checks that at least one byte was sent or received. 2) If there was a partial send/receive then we should return a negative error code but this code returns success. Fixes: a9fe713d7d45 ("drm/bridge: Add PTN3460 bridge driver") Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/0cdc2dce-ca89-451a-9774-1482ab2f4762@moroto.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/nxp-ptn3460.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c index 0851101a8c72..db5ec89366a2 100644 --- a/drivers/gpu/drm/bridge/nxp-ptn3460.c +++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c @@ -56,13 +56,13 @@ static int ptn3460_read_bytes(struct ptn3460_bridge *ptn_bridge, char addr, ret = i2c_master_send(ptn_bridge->client, &addr, 1); if (ret <= 0) { DRM_ERROR("Failed to send i2c command, ret=%d\n", ret); - return ret; + return ret ?: -EIO; } ret = i2c_master_recv(ptn_bridge->client, buf, len); - if (ret <= 0) { + if (ret != len) { DRM_ERROR("Failed to recv i2c data, ret=%d\n", ret); - return ret; + return ret < 0 ? ret : -EIO; } return 0; @@ -78,9 +78,9 @@ static int ptn3460_write_byte(struct ptn3460_bridge *ptn_bridge, char addr, buf[1] = val; ret = i2c_master_send(ptn_bridge->client, buf, ARRAY_SIZE(buf)); - if (ret <= 0) { + if (ret != ARRAY_SIZE(buf)) { DRM_ERROR("Failed to send i2c command, ret=%d\n", ret); - return ret; + return ret < 0 ? ret : -EIO; } return 0; From b5fcb340b7b20fc531b32aec27651c0e9f97daff Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 9 Nov 2023 09:38:03 +0200 Subject: [PATCH 135/187] drm/tidss: Fix atomic_flush check commit 95d4b471953411854f9c80b568da7fcf753f3801 upstream. tidss_crtc_atomic_flush() checks if the crtc is enabled, and if not, returns immediately as there's no reason to do any register changes. However, the code checks for 'crtc->state->enable', which does not reflect the actual HW state. We should instead look at the 'crtc->state->active' flag. This causes the tidss_crtc_atomic_flush() to proceed with the flush even if the active state is false, which then causes us to hit the WARN_ON(!crtc->state->event) check. Fix this by checking the active flag, and while at it, fix the related debug print which had "active" and "needs modeset" wrong way. Cc: Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem") Reviewed-by: Aradhya Bhatia Link: https://lore.kernel.org/r/20231109-tidss-probe-v2-10-ac91b5ea35c0@ideasonboard.com Signed-off-by: Tomi Valkeinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/tidss/tidss_crtc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c index cd3c43a6c806..cb66a425dd20 100644 --- a/drivers/gpu/drm/tidss/tidss_crtc.c +++ b/drivers/gpu/drm/tidss/tidss_crtc.c @@ -170,13 +170,13 @@ static void tidss_crtc_atomic_flush(struct drm_crtc *crtc, struct tidss_device *tidss = to_tidss(ddev); unsigned long flags; - dev_dbg(ddev->dev, - "%s: %s enabled %d, needs modeset %d, event %p\n", __func__, - crtc->name, drm_atomic_crtc_needs_modeset(crtc->state), - crtc->state->enable, crtc->state->event); + dev_dbg(ddev->dev, "%s: %s is %sactive, %s modeset, event %p\n", + __func__, crtc->name, crtc->state->active ? "" : "not ", + drm_atomic_crtc_needs_modeset(crtc->state) ? "needs" : "doesn't need", + crtc->state->event); /* There is nothing to do if CRTC is not going to be enabled. */ - if (!crtc->state->enable) + if (!crtc->state->active) return; /* From 85d16c03ddd3f9b10212674e16ee8662b185bd14 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 19 Jun 2023 15:04:24 -0500 Subject: [PATCH 136/187] drm/amd/display: Disable PSR-SU on Parade 0803 TCON again commit 571c2fa26aa654946447c282a09d40a56c7ff128 upstream. When screen brightness is rapidly changed and PSR-SU is enabled the display hangs on panels with this TCON even on the latest DCN 3.1.4 microcode (0x8002a81 at this time). This was disabled previously as commit 072030b17830 ("drm/amd: Disable PSR-SU on Parade 0803 TCON") but reverted as commit 1e66a17ce546 ("Revert "drm/amd: Disable PSR-SU on Parade 0803 TCON"") in favor of testing for a new enough microcode (commit cd2e31a9ab93 ("drm/amd/display: Set minimum requirement for using PSR-SU on Phoenix")). As hangs are still happening specifically with this TCON, disable PSR-SU again for it until it can be root caused. Cc: stable@vger.kernel.org Cc: aaron.ma@canonical.com Cc: binli@gnome.org Cc: Marc Rossi Cc: Hamza Mahfooz Signed-off-by: Mario Limonciello Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2046131 Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 67287ad07226..e2f436dea565 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -818,6 +818,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) isPSRSUSupported = false; else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } From 6341140b044552fc4b92163f468d81545eb3d062 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 4 Jan 2024 15:59:03 -0700 Subject: [PATCH 137/187] platform/x86: intel-uncore-freq: Fix types in sysfs callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 416de0246f35f43d871a57939671fe814f4455ee upstream. When booting a kernel with CONFIG_CFI_CLANG, there is a CFI failure when accessing any of the values under /sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00: $ cat /sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/max_freq_khz fish: Job 1, 'cat /sys/devices/system/cpu/int…' terminated by signal SIGSEGV (Address boundary error) $ sudo dmesg &| grep 'CFI failure' [ 170.953925] CFI failure at kobj_attr_show+0x19/0x30 (target: show_max_freq_khz+0x0/0xc0 [intel_uncore_frequency_common]; expected type: 0xd34078c5 The sysfs callback functions such as show_domain_id() are written as if they are going to be called by dev_attr_show() but as the above message shows, they are instead called by kobj_attr_show(). kCFI checks that the destination of an indirect jump has the exact same type as the prototype of the function pointer it is called through and fails when they do not. These callbacks are called through kobj_attr_show() because uncore_root_kobj was initialized with kobject_create_and_add(), which means uncore_root_kobj has a ->sysfs_ops of kobj_sysfs_ops from kobject_create(), which uses kobj_attr_show() as its ->show() value. The only reason there has not been a more noticeable problem until this point is that 'struct kobj_attribute' and 'struct device_attribute' have the same layout, so getting the callback from container_of() works the same with either value. Change all the callbacks and their uses to be compatible with kobj_attr_show() and kobj_attr_store(), which resolves the kCFI failure and allows the sysfs files to work properly. Closes: https://github.com/ClangBuiltLinux/linux/issues/1974 Fixes: ae7b2ce57851 ("platform/x86/intel/uncore-freq: Use sysfs API to create attributes") Cc: stable@vger.kernel.org Signed-off-by: Nathan Chancellor Reviewed-by: Sami Tolvanen Acked-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20240104-intel-uncore-freq-kcfi-fix-v1-1-bf1e8939af40@kernel.org Signed-off-by: Hans de Goede [nathan: Fix conflicts due to lack of 9b8dea80e3cb in 6.1] Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- .../uncore-frequency-common.c | 64 +++++++++---------- .../uncore-frequency-common.h | 20 +++--- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 9b12fe8e95c9..dd2e654daf4b 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -74,30 +74,30 @@ static ssize_t show_perf_status_freq_khz(struct uncore_data *data, char *buf) } #define store_uncore_min_max(name, min_max) \ - static ssize_t store_##name(struct device *dev, \ - struct device_attribute *attr, \ + static ssize_t store_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, \ const char *buf, size_t count) \ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return store_min_max_freq_khz(data, buf, count, \ min_max); \ } #define show_uncore_min_max(name, min_max) \ - static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return show_min_max_freq_khz(data, buf, min_max); \ } #define show_uncore_perf_status(name) \ - static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return show_perf_status_freq_khz(data, buf); \ } @@ -111,11 +111,11 @@ show_uncore_min_max(max_freq_khz, 1); show_uncore_perf_status(current_freq_khz); #define show_uncore_data(member_name) \ - static ssize_t show_##member_name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##member_name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ struct uncore_data *data = container_of(attr, struct uncore_data,\ - member_name##_dev_attr);\ + member_name##_kobj_attr);\ \ return sysfs_emit(buf, "%u\n", \ data->member_name); \ @@ -126,29 +126,29 @@ show_uncore_data(initial_max_freq_khz); #define init_attribute_rw(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = store_##_name; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0644; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = store_##_name; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0644; \ } while (0) #define init_attribute_ro(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = NULL; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0444; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = NULL; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0444; \ } while (0) #define init_attribute_root_ro(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = NULL; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0400; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = NULL; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0400; \ } while (0) static int create_attr_group(struct uncore_data *data, char *name) @@ -161,14 +161,14 @@ static int create_attr_group(struct uncore_data *data, char *name) init_attribute_ro(initial_max_freq_khz); init_attribute_root_ro(current_freq_khz); - data->uncore_attrs[index++] = &data->max_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr; + data->uncore_attrs[index++] = &data->max_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->min_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->initial_min_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->initial_max_freq_khz_kobj_attr.attr; ret = uncore_read_freq(data, &freq); if (!ret) - data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + data->uncore_attrs[index++] = &data->current_freq_khz_kobj_attr.attr; data->uncore_attrs[index] = NULL; diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h index f5dcfa2fb285..2d9dc3151d6e 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h @@ -23,11 +23,11 @@ * @die_id: Die id for this instance * @name: Sysfs entry name for this instance * @uncore_attr_group: Attribute group storage - * @max_freq_khz_dev_attr: Storage for device attribute max_freq_khz - * @mix_freq_khz_dev_attr: Storage for device attribute min_freq_khz - * @initial_max_freq_khz_dev_attr: Storage for device attribute initial_max_freq_khz - * @initial_min_freq_khz_dev_attr: Storage for device attribute initial_min_freq_khz - * @current_freq_khz_dev_attr: Storage for device attribute current_freq_khz + * @max_freq_khz_dev_attr: Storage for kobject attribute max_freq_khz + * @mix_freq_khz_dev_attr: Storage for kobject attribute min_freq_khz + * @initial_max_freq_khz_dev_attr: Storage for kobject attribute initial_max_freq_khz + * @initial_min_freq_khz_dev_attr: Storage for kobject attribute initial_min_freq_khz + * @current_freq_khz_dev_attr: Storage for kobject attribute current_freq_khz * @uncore_attrs: Attribute storage for group creation * * This structure is used to encapsulate all data related to uncore sysfs @@ -44,11 +44,11 @@ struct uncore_data { char name[32]; struct attribute_group uncore_attr_group; - struct device_attribute max_freq_khz_dev_attr; - struct device_attribute min_freq_khz_dev_attr; - struct device_attribute initial_max_freq_khz_dev_attr; - struct device_attribute initial_min_freq_khz_dev_attr; - struct device_attribute current_freq_khz_dev_attr; + struct kobj_attribute max_freq_khz_kobj_attr; + struct kobj_attribute min_freq_khz_kobj_attr; + struct kobj_attribute initial_max_freq_khz_kobj_attr; + struct kobj_attribute initial_min_freq_khz_kobj_attr; + struct kobj_attribute current_freq_khz_kobj_attr; struct attribute *uncore_attrs[6]; }; From 7960f14fcad02275cfd182ec17503639244074a9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Dec 2023 18:05:15 +0300 Subject: [PATCH 138/187] drm/bridge: nxp-ptn3460: simplify some error checking commit 28d3d0696688154cc04983f343011d07bf0508e4 upstream. The i2c_master_send/recv() functions return negative error codes or they return "len" on success. So the error handling here can be written as just normal checks for "if (ret < 0) return ret;". No need to complicate things. Btw, in this code the "len" parameter can never be zero, but even if it were, then I feel like this would still be the best way to write it. Fixes: 914437992876 ("drm/bridge: nxp-ptn3460: fix i2c_master_send() error checking") Suggested-by: Neil Armstrong Signed-off-by: Dan Carpenter Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/04242630-42d8-4920-8c67-24ac9db6b3c9@moroto.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/nxp-ptn3460.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c index db5ec89366a2..8686b20ac317 100644 --- a/drivers/gpu/drm/bridge/nxp-ptn3460.c +++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c @@ -54,15 +54,15 @@ static int ptn3460_read_bytes(struct ptn3460_bridge *ptn_bridge, char addr, int ret; ret = i2c_master_send(ptn_bridge->client, &addr, 1); - if (ret <= 0) { + if (ret < 0) { DRM_ERROR("Failed to send i2c command, ret=%d\n", ret); - return ret ?: -EIO; + return ret; } ret = i2c_master_recv(ptn_bridge->client, buf, len); - if (ret != len) { + if (ret < 0) { DRM_ERROR("Failed to recv i2c data, ret=%d\n", ret); - return ret < 0 ? ret : -EIO; + return ret; } return 0; @@ -78,9 +78,9 @@ static int ptn3460_write_byte(struct ptn3460_bridge *ptn_bridge, char addr, buf[1] = val; ret = i2c_master_send(ptn_bridge->client, buf, ARRAY_SIZE(buf)); - if (ret != ARRAY_SIZE(buf)) { + if (ret < 0) { DRM_ERROR("Failed to send i2c command, ret=%d\n", ret); - return ret < 0 ? ret : -EIO; + return ret; } return 0; From b4cbd018322bf5af328100442e8177b544e3dfbb Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 15 Dec 2023 11:01:42 -0500 Subject: [PATCH 139/187] drm/amd/display: Port DENTIST hang and TDR fixes to OTG disable W/A commit 4b56f7d47be87cde5f368b67bc7fac53a2c3e8d2 upstream. [Why] We can experience DENTIST hangs during optimize_bandwidth or TDRs if FIFO is toggled and hangs. [How] Port the DCN35 fixes to DCN314. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Charlene Liu Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index e43b4d7dc60e..c8136a05a8d3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -131,30 +131,27 @@ static int dcn314_get_active_display_cnt_wa( return display_count; } -static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) +static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, + bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe = safe_to_lower + ? &context->res_ctx.pipe_ctx[i] + : &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { - struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; - if (disable) { - if (stream_enc && stream_enc->funcs->disable_fifo) - pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); - pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); - - if (stream_enc && stream_enc->funcs->enable_fifo) - pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc); } } } @@ -254,11 +251,11 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn314_disable_otg_wa(clk_mgr_base, context, true); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn314_disable_otg_wa(clk_mgr_base, context, false); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } From 471ab2e8b7e1cfafcc779479dc5ca20a0578986a Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 17 Jan 2024 14:35:29 +0800 Subject: [PATCH 140/187] drm/amdgpu/pm: Fix the power source flag error commit ca1ffb174f16b699c536734fc12a4162097c49f4 upstream. The power source flag should be updated when [1] System receives an interrupt indicating that the power source has changed. [2] System resumes from suspend or runtime suspend Signed-off-by: Ma Jun Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 13 +++---------- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 ++ 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 47ff3694ffa5..1d0693dad818 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -24,6 +24,7 @@ #include #include +#include #include #include "amdgpu.h" @@ -731,16 +732,8 @@ static int smu_late_init(void *handle) * handle the switch automatically. Driver involvement * is unnecessary. */ - if (!smu->dc_controlled_by_gpio) { - ret = smu_set_power_source(smu, - adev->pm.ac_power ? SMU_POWER_SOURCE_AC : - SMU_POWER_SOURCE_DC); - if (ret) { - dev_err(adev->dev, "Failed to switch to %s mode!\n", - adev->pm.ac_power ? "AC" : "DC"); - return ret; - } - } + adev->pm.ac_power = power_supply_is_system_supplied() > 0; + smu_set_ac_dc(smu); if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 1)) || (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 3))) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index d490b571c8ff..2e061a74a0b7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1467,10 +1467,12 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = false; break; case 0x7: /* diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 1b0fb93539ec..f3257cf4b06f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1415,10 +1415,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = false; break; case 0x7: /* From 2197389e1a8aa23a57c485eb4cddcc9c759f1ed2 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 28 Jun 2023 00:12:39 +0800 Subject: [PATCH 141/187] erofs: get rid of the remaining kmap_atomic() [ Upstream commit 123ec246ebe323d468c5ca996700ea4739d20ddf ] It's unnecessary to use kmap_atomic() compared with kmap_local_page(). In addition, kmap_atomic() is deprecated now. Signed-off-by: Gao Xiang Reviewed-by: Yue Hu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20230627161240.331-1-hsiangkao@linux.alibaba.com Signed-off-by: Gao Xiang Stable-dep-of: 3c12466b6b7b ("erofs: fix lz4 inplace decompression") Signed-off-by: Sasha Levin --- fs/erofs/decompressor.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 51b7ac7166d9..59294182e4cb 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -148,7 +148,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, *maptype = 0; return inpage; } - kunmap_atomic(inpage); + kunmap_local(inpage); might_sleep(); src = erofs_vm_map_ram(rq->in, ctx->inpages); if (!src) @@ -162,7 +162,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, src = erofs_get_pcpubuf(ctx->inpages); if (!src) { DBG_BUGON(1); - kunmap_atomic(inpage); + kunmap_local(inpage); return ERR_PTR(-EFAULT); } @@ -173,9 +173,9 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, min_t(unsigned int, total, PAGE_SIZE - *inputmargin); if (!inpage) - inpage = kmap_atomic(*in); + inpage = kmap_local_page(*in); memcpy(tmp, inpage + *inputmargin, page_copycnt); - kunmap_atomic(inpage); + kunmap_local(inpage); inpage = NULL; tmp += page_copycnt; total -= page_copycnt; @@ -214,7 +214,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, int ret, maptype; DBG_BUGON(*rq->in == NULL); - headpage = kmap_atomic(*rq->in); + headpage = kmap_local_page(*rq->in); /* LZ4 decompression inplace is only safe if zero_padding is enabled */ if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) { @@ -223,7 +223,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, min_t(unsigned int, rq->inputsize, EROFS_BLKSIZ - rq->pageofs_in)); if (ret) { - kunmap_atomic(headpage); + kunmap_local(headpage); return ret; } may_inplace = !((rq->pageofs_in + rq->inputsize) & @@ -261,7 +261,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, } if (maptype == 0) { - kunmap_atomic(headpage); + kunmap_local(headpage); } else if (maptype == 1) { vm_unmap_ram(src, ctx->inpages); } else if (maptype == 2) { @@ -289,7 +289,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, /* one optimized fast path only for non bigpcluster cases yet */ if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) { DBG_BUGON(!*rq->out); - dst = kmap_atomic(*rq->out); + dst = kmap_local_page(*rq->out); dst_maptype = 0; goto dstmap_out; } @@ -311,7 +311,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, dstmap_out: ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out); if (!dst_maptype) - kunmap_atomic(dst); + kunmap_local(dst); else if (dst_maptype == 2) vm_unmap_ram(dst, ctx.outpages); return ret; From 33bf23c9940dbd3a22aad7f0cda4c84ed5701847 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 6 Dec 2023 12:55:34 +0800 Subject: [PATCH 142/187] erofs: fix lz4 inplace decompression [ Upstream commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de ] Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable # 5.4+ Tested-by: Yifan Zhao Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Sasha Levin --- fs/erofs/decompressor.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 59294182e4cb..0cfad74374ca 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -122,11 +122,11 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, } static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, - void *inpage, unsigned int *inputmargin, int *maptype, - bool may_inplace) + void *inpage, void *out, unsigned int *inputmargin, + int *maptype, bool may_inplace) { struct z_erofs_decompress_req *rq = ctx->rq; - unsigned int omargin, total, i, j; + unsigned int omargin, total, i; struct page **in; void *src, *tmp; @@ -136,12 +136,13 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) goto docopy; - for (i = 0; i < ctx->inpages; ++i) { - DBG_BUGON(rq->in[i] == NULL); - for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j) - if (rq->out[j] == rq->in[i]) - goto docopy; - } + for (i = 0; i < ctx->inpages; ++i) + if (rq->out[ctx->outpages - ctx->inpages + i] != + rq->in[i]) + goto docopy; + kunmap_local(inpage); + *maptype = 3; + return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT); } if (ctx->inpages <= 1) { @@ -149,7 +150,6 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, return inpage; } kunmap_local(inpage); - might_sleep(); src = erofs_vm_map_ram(rq->in, ctx->inpages); if (!src) return ERR_PTR(-ENOMEM); @@ -205,12 +205,12 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, } static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, - u8 *out) + u8 *dst) { struct z_erofs_decompress_req *rq = ctx->rq; bool support_0padding = false, may_inplace = false; unsigned int inputmargin; - u8 *headpage, *src; + u8 *out, *headpage, *src; int ret, maptype; DBG_BUGON(*rq->in == NULL); @@ -231,11 +231,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, } inputmargin = rq->pageofs_in; - src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin, + src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin, &maptype, may_inplace); if (IS_ERR(src)) return PTR_ERR(src); + out = dst + rq->pageofs_out; /* legacy format could compress extra data in a pcluster. */ if (rq->partial_decoding || !support_0padding) ret = LZ4_decompress_safe_partial(src + inputmargin, out, @@ -266,7 +267,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, vm_unmap_ram(src, ctx->inpages); } else if (maptype == 2) { erofs_put_pcpubuf(src); - } else { + } else if (maptype != 3) { DBG_BUGON(1); return -EFAULT; } @@ -309,7 +310,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, } dstmap_out: - ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out); + ret = z_erofs_lz4_decompress_mem(&ctx, dst); if (!dst_maptype) kunmap_local(dst); else if (dst_maptype == 2) From a14c2431e53afc596651f8cc40ead21fb3d69628 Mon Sep 17 00:00:00 2001 From: Arec Kao Date: Tue, 14 Mar 2023 13:14:36 +0100 Subject: [PATCH 143/187] media: ov13b10: Support device probe in non-zero ACPI D state [ Upstream commit 1af2f618acc1486e3b46cb54cb4891d47bb80c61 ] Tell ACPI device PM code that the driver supports the device being in non-zero ACPI D state when the driver's probe function is entered. Also do identification on the first access of the device, whether in probe or when starting streaming. Signed-off-by: Arec Kao Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Stable-dep-of: 7b0454cfd8ed ("media: ov13b10: Enable runtime PM before registering async sub-device") Signed-off-by: Sasha Levin --- drivers/media/i2c/ov13b10.c | 74 +++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/drivers/media/i2c/ov13b10.c b/drivers/media/i2c/ov13b10.c index 549e5d93e568..722f490f9db4 100644 --- a/drivers/media/i2c/ov13b10.c +++ b/drivers/media/i2c/ov13b10.c @@ -589,6 +589,9 @@ struct ov13b10 { /* Streaming on/off */ bool streaming; + + /* True if the device has been identified */ + bool identified; }; #define to_ov13b10(_sd) container_of(_sd, struct ov13b10, sd) @@ -1023,12 +1026,42 @@ ov13b10_set_pad_format(struct v4l2_subdev *sd, return 0; } +/* Verify chip ID */ +static int ov13b10_identify_module(struct ov13b10 *ov13b) +{ + struct i2c_client *client = v4l2_get_subdevdata(&ov13b->sd); + int ret; + u32 val; + + if (ov13b->identified) + return 0; + + ret = ov13b10_read_reg(ov13b, OV13B10_REG_CHIP_ID, + OV13B10_REG_VALUE_24BIT, &val); + if (ret) + return ret; + + if (val != OV13B10_CHIP_ID) { + dev_err(&client->dev, "chip id mismatch: %x!=%x\n", + OV13B10_CHIP_ID, val); + return -EIO; + } + + ov13b->identified = true; + + return 0; +} + static int ov13b10_start_streaming(struct ov13b10 *ov13b) { struct i2c_client *client = v4l2_get_subdevdata(&ov13b->sd); const struct ov13b10_reg_list *reg_list; int ret, link_freq_index; + ret = ov13b10_identify_module(ov13b); + if (ret) + return ret; + /* Get out of from software reset */ ret = ov13b10_write_reg(ov13b, OV13B10_REG_SOFTWARE_RST, OV13B10_REG_VALUE_08BIT, OV13B10_SOFTWARE_RST); @@ -1144,27 +1177,6 @@ static int __maybe_unused ov13b10_resume(struct device *dev) return ret; } -/* Verify chip ID */ -static int ov13b10_identify_module(struct ov13b10 *ov13b) -{ - struct i2c_client *client = v4l2_get_subdevdata(&ov13b->sd); - int ret; - u32 val; - - ret = ov13b10_read_reg(ov13b, OV13B10_REG_CHIP_ID, - OV13B10_REG_VALUE_24BIT, &val); - if (ret) - return ret; - - if (val != OV13B10_CHIP_ID) { - dev_err(&client->dev, "chip id mismatch: %x!=%x\n", - OV13B10_CHIP_ID, val); - return -EIO; - } - - return 0; -} - static const struct v4l2_subdev_video_ops ov13b10_video_ops = { .s_stream = ov13b10_set_stream, }; @@ -1379,6 +1391,7 @@ static int ov13b10_check_hwcfg(struct device *dev) static int ov13b10_probe(struct i2c_client *client) { struct ov13b10 *ov13b; + bool full_power; int ret; /* Check HW config */ @@ -1395,11 +1408,14 @@ static int ov13b10_probe(struct i2c_client *client) /* Initialize subdev */ v4l2_i2c_subdev_init(&ov13b->sd, client, &ov13b10_subdev_ops); - /* Check module identity */ - ret = ov13b10_identify_module(ov13b); - if (ret) { - dev_err(&client->dev, "failed to find sensor: %d\n", ret); - return ret; + full_power = acpi_dev_state_d0(&client->dev); + if (full_power) { + /* Check module identity */ + ret = ov13b10_identify_module(ov13b); + if (ret) { + dev_err(&client->dev, "failed to find sensor: %d\n", ret); + return ret; + } } /* Set default mode to max resolution */ @@ -1431,7 +1447,10 @@ static int ov13b10_probe(struct i2c_client *client) * Device is already turned on by i2c-core with ACPI domain PM. * Enable runtime PM and turn off the device. */ - pm_runtime_set_active(&client->dev); + + /* Set the device's state to active if it's in D0 state. */ + if (full_power) + pm_runtime_set_active(&client->dev); pm_runtime_enable(&client->dev); pm_runtime_idle(&client->dev); @@ -1480,6 +1499,7 @@ static struct i2c_driver ov13b10_i2c_driver = { }, .probe_new = ov13b10_probe, .remove = ov13b10_remove, + .flags = I2C_DRV_ACPI_WAIVE_D0_PROBE, }; module_i2c_driver(ov13b10_i2c_driver); From 512fc4d735c20836fc26f82bba5359f99de85021 Mon Sep 17 00:00:00 2001 From: Bingbu Cao Date: Wed, 22 Nov 2023 17:46:08 +0800 Subject: [PATCH 144/187] media: ov13b10: Enable runtime PM before registering async sub-device [ Upstream commit 7b0454cfd8edb3509619407c3b9f78a6d0dee1a5 ] As the sensor device maybe accessible right after its async sub-device is registered, such as ipu-bridge will try to power up sensor by sensor's client device's runtime PM from the async notifier callback, if runtime PM is not enabled, it will fail. So runtime PM should be ready before its async sub-device is registered and accessible by others. Fixes: 7ee850546822 ("media: Add sensor driver support for the ov13b10 camera.") Cc: stable@vger.kernel.org Signed-off-by: Bingbu Cao Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/i2c/ov13b10.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/media/i2c/ov13b10.c b/drivers/media/i2c/ov13b10.c index 722f490f9db4..368a3c2bfe34 100644 --- a/drivers/media/i2c/ov13b10.c +++ b/drivers/media/i2c/ov13b10.c @@ -1439,24 +1439,27 @@ static int ov13b10_probe(struct i2c_client *client) goto error_handler_free; } - ret = v4l2_async_register_subdev_sensor(&ov13b->sd); - if (ret < 0) - goto error_media_entity; /* * Device is already turned on by i2c-core with ACPI domain PM. * Enable runtime PM and turn off the device. */ - /* Set the device's state to active if it's in D0 state. */ if (full_power) pm_runtime_set_active(&client->dev); pm_runtime_enable(&client->dev); pm_runtime_idle(&client->dev); + ret = v4l2_async_register_subdev_sensor(&ov13b->sd); + if (ret < 0) + goto error_media_entity_runtime_pm; + return 0; -error_media_entity: +error_media_entity_runtime_pm: + pm_runtime_disable(&client->dev); + if (full_power) + pm_runtime_set_suspended(&client->dev); media_entity_cleanup(&ov13b->sd.entity); error_handler_free: @@ -1476,6 +1479,7 @@ static void ov13b10_remove(struct i2c_client *client) ov13b10_free_controls(ov13b); pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); } static const struct dev_pm_ops ov13b10_pm_ops = { From ea3357c6cfab897ff0ba73e458f4e2948236a489 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 1 Sep 2023 13:05:02 +0530 Subject: [PATCH 145/187] bus: mhi: ep: Do not allocate event ring element on stack [ Upstream commit 987fdb5a43a66764808371b54e6047834170d565 ] It is possible that the host controller driver would use DMA framework to write the event ring element. So avoid allocating event ring element on the stack as DMA cannot work on vmalloc memory. Cc: stable@vger.kernel.org Fixes: 961aeb689224 ("bus: mhi: ep: Add support for sending events to the host") Link: https://lore.kernel.org/r/20230901073502.69385-1-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Sasha Levin --- drivers/bus/mhi/ep/main.c | 68 ++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/drivers/bus/mhi/ep/main.c b/drivers/bus/mhi/ep/main.c index edd153dda40c..34e0ba6f52d0 100644 --- a/drivers/bus/mhi/ep/main.c +++ b/drivers/bus/mhi/ep/main.c @@ -71,45 +71,77 @@ static int mhi_ep_send_event(struct mhi_ep_cntrl *mhi_cntrl, u32 ring_idx, static int mhi_ep_send_completion_event(struct mhi_ep_cntrl *mhi_cntrl, struct mhi_ep_ring *ring, struct mhi_ring_element *tre, u32 len, enum mhi_ev_ccs code) { - struct mhi_ring_element event = {}; + struct mhi_ring_element *event; + int ret; - event.ptr = cpu_to_le64(ring->rbase + ring->rd_offset * sizeof(*tre)); - event.dword[0] = MHI_TRE_EV_DWORD0(code, len); - event.dword[1] = MHI_TRE_EV_DWORD1(ring->ch_id, MHI_PKT_TYPE_TX_EVENT); + event = kzalloc(sizeof(struct mhi_ring_element), GFP_KERNEL); + if (!event) + return -ENOMEM; - return mhi_ep_send_event(mhi_cntrl, ring->er_index, &event, MHI_TRE_DATA_GET_BEI(tre)); + event->ptr = cpu_to_le64(ring->rbase + ring->rd_offset * sizeof(*tre)); + event->dword[0] = MHI_TRE_EV_DWORD0(code, len); + event->dword[1] = MHI_TRE_EV_DWORD1(ring->ch_id, MHI_PKT_TYPE_TX_EVENT); + + ret = mhi_ep_send_event(mhi_cntrl, ring->er_index, event, MHI_TRE_DATA_GET_BEI(tre)); + kfree(event); + + return ret; } int mhi_ep_send_state_change_event(struct mhi_ep_cntrl *mhi_cntrl, enum mhi_state state) { - struct mhi_ring_element event = {}; + struct mhi_ring_element *event; + int ret; - event.dword[0] = MHI_SC_EV_DWORD0(state); - event.dword[1] = MHI_SC_EV_DWORD1(MHI_PKT_TYPE_STATE_CHANGE_EVENT); + event = kzalloc(sizeof(struct mhi_ring_element), GFP_KERNEL); + if (!event) + return -ENOMEM; - return mhi_ep_send_event(mhi_cntrl, 0, &event, 0); + event->dword[0] = MHI_SC_EV_DWORD0(state); + event->dword[1] = MHI_SC_EV_DWORD1(MHI_PKT_TYPE_STATE_CHANGE_EVENT); + + ret = mhi_ep_send_event(mhi_cntrl, 0, event, 0); + kfree(event); + + return ret; } int mhi_ep_send_ee_event(struct mhi_ep_cntrl *mhi_cntrl, enum mhi_ee_type exec_env) { - struct mhi_ring_element event = {}; + struct mhi_ring_element *event; + int ret; - event.dword[0] = MHI_EE_EV_DWORD0(exec_env); - event.dword[1] = MHI_SC_EV_DWORD1(MHI_PKT_TYPE_EE_EVENT); + event = kzalloc(sizeof(struct mhi_ring_element), GFP_KERNEL); + if (!event) + return -ENOMEM; - return mhi_ep_send_event(mhi_cntrl, 0, &event, 0); + event->dword[0] = MHI_EE_EV_DWORD0(exec_env); + event->dword[1] = MHI_SC_EV_DWORD1(MHI_PKT_TYPE_EE_EVENT); + + ret = mhi_ep_send_event(mhi_cntrl, 0, event, 0); + kfree(event); + + return ret; } static int mhi_ep_send_cmd_comp_event(struct mhi_ep_cntrl *mhi_cntrl, enum mhi_ev_ccs code) { struct mhi_ep_ring *ring = &mhi_cntrl->mhi_cmd->ring; - struct mhi_ring_element event = {}; + struct mhi_ring_element *event; + int ret; - event.ptr = cpu_to_le64(ring->rbase + ring->rd_offset * sizeof(struct mhi_ring_element)); - event.dword[0] = MHI_CC_EV_DWORD0(code); - event.dword[1] = MHI_CC_EV_DWORD1(MHI_PKT_TYPE_CMD_COMPLETION_EVENT); + event = kzalloc(sizeof(struct mhi_ring_element), GFP_KERNEL); + if (!event) + return -ENOMEM; - return mhi_ep_send_event(mhi_cntrl, 0, &event, 0); + event->ptr = cpu_to_le64(ring->rbase + ring->rd_offset * sizeof(struct mhi_ring_element)); + event->dword[0] = MHI_CC_EV_DWORD0(code); + event->dword[1] = MHI_CC_EV_DWORD1(MHI_PKT_TYPE_CMD_COMPLETION_EVENT); + + ret = mhi_ep_send_event(mhi_cntrl, 0, event, 0); + kfree(event); + + return ret; } static int mhi_ep_process_cmd_ring(struct mhi_ep_ring *ring, struct mhi_ring_element *el) From a9dbf8ca3101b5b2efe355bf61b8881bb9b579f3 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Sun, 26 Mar 2023 06:19:35 +0800 Subject: [PATCH 146/187] PM: core: Remove unnecessary (void *) conversions [ Upstream commit 73d73f5ee7fb0c42ff87091d105bee720a9565f1 ] Assignments from pointer variables of type (void *) do not require explicit type casts, so remove such type cases from the code in drivers/base/power/main.c where applicable. Signed-off-by: Li zeming [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Stable-dep-of: 7839d0078e0d ("PM: sleep: Fix possible deadlocks in core system-wide PM code") Signed-off-by: Sasha Levin --- drivers/base/power/main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c50139207794..f85f3515c258 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -679,7 +679,7 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) static void async_resume_noirq(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume_noirq(dev, pm_transition, true); @@ -816,7 +816,7 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn static void async_resume_early(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume_early(dev, pm_transition, true); @@ -980,7 +980,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) static void async_resume(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume(dev, pm_transition, true); @@ -1269,7 +1269,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a static void async_suspend_noirq(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend_noirq(dev, pm_transition, true); @@ -1450,7 +1450,7 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as static void async_suspend_late(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend_late(dev, pm_transition, true); @@ -1727,7 +1727,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) static void async_suspend(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend(dev, pm_transition, true); From e1c9d32c98309ae764893a481552d3f99d46cb34 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 27 Dec 2023 21:41:06 +0100 Subject: [PATCH 147/187] PM: sleep: Fix possible deadlocks in core system-wide PM code [ Upstream commit 7839d0078e0d5e6cc2fa0b0dfbee71de74f1e557 ] It is reported that in low-memory situations the system-wide resume core code deadlocks, because async_schedule_dev() executes its argument function synchronously if it cannot allocate memory (and not only in that case) and that function attempts to acquire a mutex that is already held. Executing the argument function synchronously from within dpm_async_fn() may also be problematic for ordering reasons (it may cause a consumer device's resume callback to be invoked before a requisite supplier device's one, for example). Address this by changing the code in question to use async_schedule_dev_nocall() for scheduling the asynchronous execution of device suspend and resume functions and to directly run them synchronously if async_schedule_dev_nocall() returns false. Link: https://lore.kernel.org/linux-pm/ZYvjiqX6EsL15moe@perf/ Reported-by: Youngmin Nam Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka Tested-by: Youngmin Nam Reviewed-by: Ulf Hansson Cc: 5.7+ # 5.7+: 6aa09a5bccd8 async: Split async_schedule_node_domain() Cc: 5.7+ # 5.7+: 7d4b5d7a37bd async: Introduce async_schedule_dev_nocall() Cc: 5.7+ # 5.7+ Signed-off-by: Sasha Levin --- drivers/base/power/main.c | 148 ++++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 80 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f85f3515c258..9c5a5f4dba5a 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -579,7 +579,7 @@ bool dev_pm_skip_resume(struct device *dev) } /** - * device_resume_noirq - Execute a "noirq resume" callback for given device. + * __device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. @@ -587,7 +587,7 @@ bool dev_pm_skip_resume(struct device *dev) * The driver of @dev will not receive interrupts while this function is being * executed. */ -static int device_resume_noirq(struct device *dev, pm_message_t state, bool async) +static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -655,7 +655,13 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn Out: complete_all(&dev->power.completion); TRACE_RESUME(error); - return error; + + if (error) { + suspend_stats.failed_resume_noirq++; + dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); + } } static bool is_async(struct device *dev) @@ -668,11 +674,15 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) { reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(func, dev); + if (!is_async(dev)) + return false; + + get_device(dev); + + if (async_schedule_dev_nocall(func, dev)) return true; - } + + put_device(dev); return false; } @@ -680,15 +690,19 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) static void async_resume_noirq(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = device_resume_noirq(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume_noirq(dev, pm_transition, true); put_device(dev); } +static void device_resume_noirq(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_noirq)) + return; + + __device_resume_noirq(dev, pm_transition, false); +} + static void dpm_noirq_resume_devices(pm_message_t state) { struct device *dev; @@ -698,14 +712,6 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_noirq_list, power.entry) - dpm_async_fn(dev, async_resume_noirq); - while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); get_device(dev); @@ -713,17 +719,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_noirq(dev, state, false); - if (error) { - suspend_stats.failed_resume_noirq++; - dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " noirq", error); - } - } + device_resume_noirq(dev); put_device(dev); @@ -751,14 +747,14 @@ void dpm_resume_noirq(pm_message_t state) } /** - * device_resume_early - Execute an "early resume" callback for given device. + * __device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ -static int device_resume_early(struct device *dev, pm_message_t state, bool async) +static void __device_resume_early(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -811,21 +807,31 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn pm_runtime_enable(dev); complete_all(&dev->power.completion); - return error; + + if (error) { + suspend_stats.failed_resume_early++; + dpm_save_failed_step(SUSPEND_RESUME_EARLY); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async early" : " early", error); + } } static void async_resume_early(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = device_resume_early(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume_early(dev, pm_transition, true); put_device(dev); } +static void device_resume_early(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_early)) + return; + + __device_resume_early(dev, pm_transition, false); +} + /** * dpm_resume_early - Execute "early resume" callbacks for all devices. * @state: PM transition of the system being carried out. @@ -839,14 +845,6 @@ void dpm_resume_early(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_late_early_list, power.entry) - dpm_async_fn(dev, async_resume_early); - while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); get_device(dev); @@ -854,17 +852,7 @@ void dpm_resume_early(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_early(dev, state, false); - if (error) { - suspend_stats.failed_resume_early++; - dpm_save_failed_step(SUSPEND_RESUME_EARLY); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " early", error); - } - } + device_resume_early(dev); put_device(dev); @@ -888,12 +876,12 @@ void dpm_resume_start(pm_message_t state) EXPORT_SYMBOL_GPL(dpm_resume_start); /** - * device_resume - Execute "resume" callbacks for given device. + * __device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. */ -static int device_resume(struct device *dev, pm_message_t state, bool async) +static void __device_resume(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -975,20 +963,30 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) TRACE_RESUME(error); - return error; + if (error) { + suspend_stats.failed_resume++; + dpm_save_failed_step(SUSPEND_RESUME); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async" : "", error); + } } static void async_resume(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - error = device_resume(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume(dev, pm_transition, true); put_device(dev); } +static void device_resume(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume)) + return; + + __device_resume(dev, pm_transition, false); +} + /** * dpm_resume - Execute "resume" callbacks for non-sysdev devices. * @state: PM transition of the system being carried out. @@ -1008,27 +1006,17 @@ void dpm_resume(pm_message_t state) pm_transition = state; async_error = 0; - list_for_each_entry(dev, &dpm_suspended_list, power.entry) - dpm_async_fn(dev, async_resume); - while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); + get_device(dev); - if (!is_async(dev)) { - int error; - mutex_unlock(&dpm_list_mtx); + mutex_unlock(&dpm_list_mtx); - error = device_resume(dev, state, false); - if (error) { - suspend_stats.failed_resume++; - dpm_save_failed_step(SUSPEND_RESUME); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, "", error); - } + device_resume(dev); + + mutex_lock(&dpm_list_mtx); - mutex_lock(&dpm_list_mtx); - } if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); From a8056e821ccf26ff421b40e8db520794a7cfbb05 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Tue, 2 Jan 2024 20:14:56 -0800 Subject: [PATCH 148/187] thermal: intel: hfi: Refactor enabling code into helper functions [ Upstream commit 8a8b6bb93c704776c4b05cb517c3fa8baffb72f5 ] In preparation for the addition of a suspend notifier, wrap the logic to enable HFI and program its memory buffer into helper functions. Both the CPU hotplug callback and the suspend notifier will use them. This refactoring does not introduce functional changes. Signed-off-by: Ricardo Neri Signed-off-by: Rafael J. Wysocki Stable-dep-of: 97566d09fd02 ("thermal: intel: hfi: Add syscore callbacks for system-wide PM") Signed-off-by: Sasha Levin --- drivers/thermal/intel/intel_hfi.c | 43 ++++++++++++++++--------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index a0640f762dc5..1a014595ed49 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -338,6 +338,26 @@ static void init_hfi_instance(struct hfi_instance *hfi_instance) hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; } +/* Caller must hold hfi_instance_lock. */ +static void hfi_enable(void) +{ + u64 msr_val; + + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); +} + +static void hfi_set_hw_table(struct hfi_instance *hfi_instance) +{ + phys_addr_t hw_table_pa; + u64 msr_val; + + hw_table_pa = virt_to_phys(hfi_instance->hw_table); + msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); +} + /** * intel_hfi_online() - Enable HFI on @cpu * @cpu: CPU in which the HFI will be enabled @@ -355,8 +375,6 @@ void intel_hfi_online(unsigned int cpu) { struct hfi_instance *hfi_instance; struct hfi_cpu_info *info; - phys_addr_t hw_table_pa; - u64 msr_val; u16 die_id; /* Nothing to do if hfi_instances are missing. */ @@ -400,8 +418,6 @@ void intel_hfi_online(unsigned int cpu) if (!hfi_instance->hw_table) goto unlock; - hw_table_pa = virt_to_phys(hfi_instance->hw_table); - /* * Allocate memory to keep a local copy of the table that * hardware generates. @@ -411,16 +427,6 @@ void intel_hfi_online(unsigned int cpu) if (!hfi_instance->local_table) goto free_hw_table; - /* - * Program the address of the feedback table of this die/package. On - * some processors, hardware remembers the old address of the HFI table - * even after having been reprogrammed and re-enabled. Thus, do not free - * the pages allocated for the table or reprogram the hardware with a - * new base address. Namely, program the hardware only once. - */ - msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; - wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); - init_hfi_instance(hfi_instance); INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); @@ -429,13 +435,8 @@ void intel_hfi_online(unsigned int cpu) cpumask_set_cpu(cpu, hfi_instance->cpus); - /* - * Enable the hardware feedback interface and never disable it. See - * comment on programming the address of the table. - */ - rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); - msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; - wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + hfi_set_hw_table(hfi_instance); + hfi_enable(); unlock: mutex_unlock(&hfi_instance_lock); From b2517d1412444dc8e65c5af6032719c8f6b30363 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Tue, 2 Jan 2024 20:14:58 -0800 Subject: [PATCH 149/187] thermal: intel: hfi: Disable an HFI instance when all its CPUs go offline [ Upstream commit 1c53081d773c2cb4461636559b0d55b46559ceec ] In preparation to support hibernation, add functionality to disable an HFI instance during CPU offline. The last CPU of an instance that goes offline will disable such instance. The Intel Software Development Manual states that the operating system must wait for the hardware to set MSR_IA32_PACKAGE_THERM_STATUS[26] after disabling an HFI instance to ensure that it will no longer write on the HFI memory. Some processors, however, do not ever set such bit. Wait a minimum of 2ms to give time hardware to complete any pending memory writes. Signed-off-by: Ricardo Neri Signed-off-by: Rafael J. Wysocki Stable-dep-of: 97566d09fd02 ("thermal: intel: hfi: Add syscore callbacks for system-wide PM") Signed-off-by: Sasha Levin --- drivers/thermal/intel/intel_hfi.c | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 1a014595ed49..5352fcb72ea3 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -358,6 +359,32 @@ static void hfi_set_hw_table(struct hfi_instance *hfi_instance) wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); } +/* Caller must hold hfi_instance_lock. */ +static void hfi_disable(void) +{ + u64 msr_val; + int i; + + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + + /* + * Wait for hardware to acknowledge the disabling of HFI. Some + * processors may not do it. Wait for ~2ms. This is a reasonable + * time for hardware to complete any pending actions on the HFI + * memory. + */ + for (i = 0; i < 2000; i++) { + rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); + if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED) + break; + + udelay(1); + cpu_relax(); + } +} + /** * intel_hfi_online() - Enable HFI on @cpu * @cpu: CPU in which the HFI will be enabled @@ -412,6 +439,10 @@ void intel_hfi_online(unsigned int cpu) /* * Hardware is programmed with the physical address of the first page * frame of the table. Hence, the allocated memory must be page-aligned. + * + * Some processors do not forget the initial address of the HFI table + * even after having been reprogrammed. Keep using the same pages. Do + * not free them. */ hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, GFP_KERNEL | __GFP_ZERO); @@ -476,6 +507,10 @@ void intel_hfi_offline(unsigned int cpu) mutex_lock(&hfi_instance_lock); cpumask_clear_cpu(cpu, hfi_instance->cpus); + + if (!cpumask_weight(hfi_instance->cpus)) + hfi_disable(); + mutex_unlock(&hfi_instance_lock); } From 28f010dc50df0f7987c04112114fcfa7e0803566 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Tue, 9 Jan 2024 19:07:04 -0800 Subject: [PATCH 150/187] thermal: intel: hfi: Add syscore callbacks for system-wide PM [ Upstream commit 97566d09fd02d2ab329774bb89a2cdf2267e86d9 ] The kernel allocates a memory buffer and provides its location to the hardware, which uses it to update the HFI table. This allocation occurs during boot and remains constant throughout runtime. When resuming from hibernation, the restore kernel allocates a second memory buffer and reprograms the HFI hardware with the new location as part of a normal boot. The location of the second memory buffer may differ from the one allocated by the image kernel. When the restore kernel transfers control to the image kernel, its HFI buffer becomes invalid, potentially leading to memory corruption if the hardware writes to it (the hardware continues to use the buffer from the restore kernel). It is also possible that the hardware "forgets" the address of the memory buffer when resuming from "deep" suspend. Memory corruption may also occur in such a scenario. To prevent the described memory corruption, disable HFI when preparing to suspend or hibernate. Enable it when resuming. Add syscore callbacks to handle the package of the boot CPU (packages of non-boot CPUs are handled via CPU offline). Syscore ops always run on the boot CPU. Additionally, HFI only needs to be disabled during "deep" suspend and hibernation. Syscore ops only run in these cases. Cc: 6.1+ # 6.1+ Signed-off-by: Ricardo Neri [ rjw: Comment adjustment, subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/thermal/intel/intel_hfi.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 5352fcb72ea3..750dab3f259e 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -35,7 +35,9 @@ #include #include #include +#include #include +#include #include #include @@ -559,6 +561,30 @@ static __init int hfi_parse_features(void) return 0; } +static void hfi_do_enable(void) +{ + /* This code runs only on the boot CPU. */ + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0); + struct hfi_instance *hfi_instance = info->hfi_instance; + + /* No locking needed. There is no concurrency with CPU online. */ + hfi_set_hw_table(hfi_instance); + hfi_enable(); +} + +static int hfi_do_disable(void) +{ + /* No locking needed. There is no concurrency with CPU offline. */ + hfi_disable(); + + return 0; +} + +static struct syscore_ops hfi_pm_ops = { + .resume = hfi_do_enable, + .suspend = hfi_do_disable, +}; + void __init intel_hfi_init(void) { struct hfi_instance *hfi_instance; @@ -590,6 +616,8 @@ void __init intel_hfi_init(void) if (!hfi_updates_wq) goto err_nomem; + register_syscore_ops(&hfi_pm_ops); + return; err_nomem: From 6f5c4aaddd637d3e19a222312ba170ab851437dd Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Thu, 21 Sep 2023 09:57:53 +0200 Subject: [PATCH 151/187] fs/pipe: move check to pipe_has_watch_queue() [ Upstream commit b4bd6b4bac8edd61eb8f7b836969d12c0c6af165 ] This declutters the code by reducing the number of #ifdefs and makes the watch_queue checks simpler. This has no runtime effect; the machine code is identical. Signed-off-by: Max Kellermann Message-Id: <20230921075755.1378787-2-max.kellermann@ionos.com> Reviewed-by: David Howells Signed-off-by: Christian Brauner Stable-dep-of: e95aada4cb93 ("pipe: wakeup wr_wait after setting max_usage") Signed-off-by: Sasha Levin --- fs/pipe.c | 12 +++--------- include/linux/pipe_fs_i.h | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 42c7ff41c2db..e8082ffe5171 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -436,12 +436,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) goto out; } -#ifdef CONFIG_WATCH_QUEUE - if (pipe->watch_queue) { + if (pipe_has_watch_queue(pipe)) { ret = -EXDEV; goto out; } -#endif /* * If it wasn't empty we try to merge new data into @@ -1320,10 +1318,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) unsigned int nr_slots, size; long ret = 0; -#ifdef CONFIG_WATCH_QUEUE - if (pipe->watch_queue) + if (pipe_has_watch_queue(pipe)) return -EBUSY; -#endif size = round_pipe_size(arg); nr_slots = size >> PAGE_SHIFT; @@ -1375,10 +1371,8 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) if (file->f_op != &pipefifo_fops || !pipe) return NULL; -#ifdef CONFIG_WATCH_QUEUE - if (for_splice && pipe->watch_queue) + if (for_splice && pipe_has_watch_queue(pipe)) return NULL; -#endif return pipe; } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 28b3c6a67397..1f1e7ae95320 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -124,6 +124,22 @@ struct pipe_buf_operations { bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; +/** + * pipe_has_watch_queue - Check whether the pipe is a watch_queue, + * i.e. it was created with O_NOTIFICATION_PIPE + * @pipe: The pipe to check + * + * Return: true if pipe is a watch queue, false otherwise. + */ +static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) +{ +#ifdef CONFIG_WATCH_QUEUE + return pipe->watch_queue != NULL; +#else + return false; +#endif +} + /** * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer From b87a1229d8668fbc78ebd9ca0fc797a76001c60f Mon Sep 17 00:00:00 2001 From: Lukas Schauer Date: Fri, 1 Dec 2023 11:11:28 +0100 Subject: [PATCH 152/187] pipe: wakeup wr_wait after setting max_usage [ Upstream commit e95aada4cb93d42e25c30a0ef9eb2923d9711d4a ] Commit c73be61cede5 ("pipe: Add general notification queue support") a regression was introduced that would lock up resized pipes under certain conditions. See the reproducer in [1]. The commit resizing the pipe ring size was moved to a different function, doing that moved the wakeup for pipe->wr_wait before actually raising pipe->max_usage. If a pipe was full before the resize occured it would result in the wakeup never actually triggering pipe_write. Set @max_usage and @nr_accounted before waking writers if this isn't a watch queue. Link: https://bugzilla.kernel.org/show_bug.cgi?id=212295 [1] Link: https://lore.kernel.org/r/20231201-orchideen-modewelt-e009de4562c6@brauner Fixes: c73be61cede5 ("pipe: Add general notification queue support") Reviewed-by: David Howells Cc: Signed-off-by: Lukas Schauer [Christian Brauner : rewrite to account for watch queues] Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/pipe.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index e8082ffe5171..9873a6030df5 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1301,6 +1301,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) pipe->tail = tail; pipe->head = head; + if (!pipe_has_watch_queue(pipe)) { + pipe->max_usage = nr_slots; + pipe->nr_accounted = nr_slots; + } + spin_unlock_irq(&pipe->rd_wait.lock); /* This might have made more room for writers */ @@ -1352,8 +1357,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) if (ret < 0) goto out_revert_acct; - pipe->max_usage = nr_slots; - pipe->nr_accounted = nr_slots; return pipe->max_usage * PAGE_SIZE; out_revert_acct: From 46cd7ef69fe60baee00b9650c3ba77918cc938de Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 20 Nov 2023 17:43:21 +0100 Subject: [PATCH 153/187] ARM: dts: qcom: sdx55: fix USB wakeup interrupt types [ Upstream commit d0ec3c4c11c3b30e1f2d344973b2a7bf0f986734 ] The DP/DM wakeup interrupts are edge triggered and which edge to trigger on depends on use-case and whether a Low speed or Full/High speed device is connected. Fixes: fea4b41022f3 ("ARM: dts: qcom: sdx55: Add USB3 and PHY support") Cc: stable@vger.kernel.org # 5.12 Cc: Manivannan Sadhasivam Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231120164331.8116-2-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom-sdx55.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom-sdx55.dtsi index a4bf1d5ee206..eb1db01031b2 100644 --- a/arch/arm/boot/dts/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom-sdx55.dtsi @@ -497,8 +497,8 @@ usb: usb@a6f8800 { interrupts = , , - , - ; + , + ; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; From bba1320ef21bd25f9f77b0852524aed35b7d6b00 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 6 Dec 2023 23:15:54 +0100 Subject: [PATCH 154/187] ARM: dts: samsung: exynos4210-i9100: Unconditionally enable LDO12 [ Upstream commit 84228d5e29dbc7a6be51e221000e1d122125826c ] The kernel hangs for a good 12 seconds without any info being printed to dmesg, very early in the boot process, if this regulator is not enabled. Force-enable it to work around this issue, until we know more about the underlying problem. Signed-off-by: Paul Cercueil Fixes: 8620cc2f99b7 ("ARM: dts: exynos: Add devicetree file for the Galaxy S2") Cc: stable@vger.kernel.org # v5.8+ Link: https://lore.kernel.org/r/20231206221556.15348-2-paul@crapouillou.net Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm/boot/dts/exynos4210-i9100.dts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/exynos4210-i9100.dts b/arch/arm/boot/dts/exynos4210-i9100.dts index 53e023fc1cac..b4c7b642e456 100644 --- a/arch/arm/boot/dts/exynos4210-i9100.dts +++ b/arch/arm/boot/dts/exynos4210-i9100.dts @@ -521,6 +521,14 @@ vtcam_reg: LDO12 { regulator-name = "VT_CAM_1.8V"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + + /* + * Force-enable this regulator; otherwise the + * kernel hangs very early in the boot process + * for about 12 seconds, without apparent + * reason. + */ + regulator-always-on; }; vcclcd_reg: LDO13 { From 34d2c909c7462b301d95bf9c20344350d8694ede Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Dec 2023 18:31:29 +0100 Subject: [PATCH 155/187] ARM: dts: qcom: sdx55: fix pdc '#interrupt-cells' [ Upstream commit cc25bd06c16aa582596a058d375b2e3133f79b93 ] The Qualcomm PDC interrupt controller binding expects two cells in interrupt specifiers. Fixes: 9d038b2e62de ("ARM: dts: qcom: Add SDX55 platform and MTP board support") Cc: stable@vger.kernel.org # 5.12 Cc: Manivannan Sadhasivam Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20231213173131.29436-2-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom-sdx55.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom-sdx55.dtsi index eb1db01031b2..7bb2f41049d9 100644 --- a/arch/arm/boot/dts/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom-sdx55.dtsi @@ -522,7 +522,7 @@ pdc: interrupt-controller@b210000 { compatible = "qcom,sdx55-pdc", "qcom,pdc"; reg = <0x0b210000 0x30000>; qcom,pdc-ranges = <0 179 52>; - #interrupt-cells = <3>; + #interrupt-cells = <2>; interrupt-parent = <&intc>; interrupt-controller; }; From ecf87621b4971434a0a0db741c640db66e6a149d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Dec 2023 18:31:30 +0100 Subject: [PATCH 156/187] ARM: dts: qcom: sdx55: fix USB DP/DM HS PHY interrupts [ Upstream commit de95f139394a5ed82270f005bc441d2e7c1e51b7 ] The USB DP/DM HS PHY interrupts need to be provided by the PDC interrupt controller in order to be able to wake the system up from low-power states and to be able to detect disconnect events, which requires triggering on falling edges. A recent commit updated the trigger type but failed to change the interrupt provider as required. This leads to the current Linux driver failing to probe instead of printing an error during suspend and USB wakeup not working as intended. Fixes: d0ec3c4c11c3 ("ARM: dts: qcom: sdx55: fix USB wakeup interrupt types") Fixes: fea4b41022f3 ("ARM: dts: qcom: sdx55: Add USB3 and PHY support") Cc: stable@vger.kernel.org # 5.12 Cc: Manivannan Sadhasivam Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20231213173131.29436-3-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom-sdx55.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom-sdx55.dtsi index 7bb2f41049d9..de747859fdc6 100644 --- a/arch/arm/boot/dts/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom-sdx55.dtsi @@ -495,10 +495,10 @@ usb: usb@a6f8800 { <&gcc GCC_USB30_MASTER_CLK>; assigned-clock-rates = <19200000>, <200000000>; - interrupts = , - , - , - ; + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 11 IRQ_TYPE_EDGE_BOTH>, + <&pdc 10 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; From fd7c2ffa0ea4941adf7b6af068c66bb21bbb7ab5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Dec 2023 18:31:31 +0100 Subject: [PATCH 157/187] ARM: dts: qcom: sdx55: fix USB SS wakeup [ Upstream commit 710dd03464e4ab5b3d329768388b165d61958577 ] The USB SS PHY interrupt needs to be provided by the PDC interrupt controller in order to be able to wake the system up from low-power states. Fixes: fea4b41022f3 ("ARM: dts: qcom: sdx55: Add USB3 and PHY support") Cc: stable@vger.kernel.org # 5.12 Cc: Manivannan Sadhasivam Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20231213173131.29436-4-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom-sdx55.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom-sdx55.dtsi index de747859fdc6..b85820448b9d 100644 --- a/arch/arm/boot/dts/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom-sdx55.dtsi @@ -496,7 +496,7 @@ usb: usb@a6f8800 { assigned-clock-rates = <19200000>, <200000000>; interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 198 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 51 IRQ_TYPE_LEVEL_HIGH>, <&pdc 11 IRQ_TYPE_EDGE_BOTH>, <&pdc 10 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", From e11dea8f503341507018b60906c4a9e7332f3663 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Mon, 6 Nov 2023 15:24:38 -0600 Subject: [PATCH 158/187] dlm: use kernel_connect() and kernel_bind() [ Upstream commit e9cdebbe23f1aa9a1caea169862f479ab3fa2773 ] Recent changes to kernel_connect() and kernel_bind() ensure that callers are insulated from changes to the address parameter made by BPF SOCK_ADDR hooks. This patch wraps direct calls to ops->connect() and ops->bind() with kernel_connect() and kernel_bind() to protect callers in such cases. Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.camel@redhat.com/ Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect") Fixes: 4fbac77d2d09 ("bpf: Hooks for sys_bind") Cc: stable@vger.kernel.org Signed-off-by: Jordan Rife Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/lowcomms.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 6ed09edabea0..72f34f96d015 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1845,8 +1845,8 @@ static int dlm_tcp_bind(struct socket *sock) memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr)); make_sockaddr(&src_addr, 0, &addr_len); - result = sock->ops->bind(sock, (struct sockaddr *)&src_addr, - addr_len); + result = kernel_bind(sock, (struct sockaddr *)&src_addr, + addr_len); if (result < 0) { /* This *may* not indicate a critical error */ log_print("could not bind for connect: %d", result); @@ -1860,7 +1860,7 @@ static int dlm_tcp_connect(struct connection *con, struct socket *sock, { int ret; - ret = sock->ops->connect(sock, addr, addr_len, O_NONBLOCK); + ret = kernel_connect(sock, addr, addr_len, O_NONBLOCK); switch (ret) { case -EINPROGRESS: fallthrough; @@ -1900,8 +1900,8 @@ static int dlm_tcp_listen_bind(struct socket *sock) /* Bind to our port */ make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); - return sock->ops->bind(sock, (struct sockaddr *)dlm_local_addr[0], - addr_len); + return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0], + addr_len); } static const struct dlm_proto_ops dlm_tcp_ops = { @@ -1928,12 +1928,12 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock, int ret; /* - * Make sock->ops->connect() function return in specified time, + * Make kernel_connect() function return in specified time, * since O_NONBLOCK argument in connect() function does not work here, * then, we should restore the default value of this attribute. */ sock_set_sndtimeo(sock->sk, 5); - ret = sock->ops->connect(sock, addr, addr_len, 0); + ret = kernel_connect(sock, addr, addr_len, 0); sock_set_sndtimeo(sock->sk, 0); if (ret < 0) return ret; From 4fd9a021214208f9667ca9ed461713ea2e2e04f1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Sep 2023 20:43:18 +0206 Subject: [PATCH 159/187] serial: core: Provide port lock wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b0af4bcb49464c221ad5f95d40f2b1b252ceedcc ] When a serial port is used for kernel console output, then all modifications to the UART registers which are done from other contexts, e.g. getty, termios, are interference points for the kernel console. So far this has been ignored and the printk output is based on the principle of hope. The rework of the console infrastructure which aims to support threaded and atomic consoles, requires to mark sections which modify the UART registers as unsafe. This allows the atomic write function to make informed decisions and eventually to restore operational state. It also allows to prevent the regular UART code from modifying UART registers while printk output is in progress. All modifications of UART registers are guarded by the UART port lock, which provides an obvious synchronization point with the console infrastructure. Provide wrapper functions for spin_[un]lock*(port->lock) invocations so that the console mechanics can be applied later on at a single place and does not require to copy the same logic all over the drivers. Signed-off-by: Thomas Gleixner Reviewed-by: Ilpo Järvinen Signed-off-by: John Ogness Link: https://lore.kernel.org/r/20230914183831.587273-2-john.ogness@linutronix.de Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 9915753037eb ("serial: sc16is7xx: fix unconditional activation of THRI interrupt") Signed-off-by: Sasha Levin --- include/linux/serial_core.h | 79 +++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index d657f2a42a7b..6055dbe5c12e 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -583,6 +583,85 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/** + * uart_port_lock - Lock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock(struct uart_port *up) +{ + spin_lock(&up->lock); +} + +/** + * uart_port_lock_irq - Lock the UART port and disable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock_irq(struct uart_port *up) +{ + spin_lock_irq(&up->lock); +} + +/** + * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + */ +static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_trylock - Try to lock the UART port + * @up: Pointer to UART port structure + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock(struct uart_port *up) +{ + return spin_trylock(&up->lock); +} + +/** + * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) +{ + return spin_trylock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_unlock - Unlock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock(struct uart_port *up) +{ + spin_unlock(&up->lock); +} + +/** + * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock_irq(struct uart_port *up) +{ + spin_unlock_irq(&up->lock); +} + +/** + * uart_port_lock_irqrestore - Unlock the UART port, restore interrupts + * @up: Pointer to UART port structure + * @flags: The saved interrupt flags for restore + */ +static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); From 49d733c4bbeeba0bb523d8b947ae717fdc70347c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Sep 2023 20:44:12 +0206 Subject: [PATCH 160/187] serial: sc16is7xx: Use port lock wrappers [ Upstream commit b465848be8a652e2c5fefe102661fb660cff8497 ] When a serial port is used for kernel console output, then all modifications to the UART registers which are done from other contexts, e.g. getty, termios, are interference points for the kernel console. So far this has been ignored and the printk output is based on the principle of hope. The rework of the console infrastructure which aims to support threaded and atomic consoles, requires to mark sections which modify the UART registers as unsafe. This allows the atomic write function to make informed decisions and eventually to restore operational state. It also allows to prevent the regular UART code from modifying UART registers while printk output is in progress. All modifications of UART registers are guarded by the UART port lock, which provides an obvious synchronization point with the console infrastructure. To avoid adding this functionality to all UART drivers, wrap the spin_[un]lock*() invocations for uart_port::lock into helper functions which just contain the spin_[un]lock*() invocations for now. In a subsequent step these helpers will gain the console synchronization mechanisms. Converted with coccinelle. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: John Ogness Link: https://lore.kernel.org/r/20230914183831.587273-56-john.ogness@linutronix.de Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 9915753037eb ("serial: sc16is7xx: fix unconditional activation of THRI interrupt") Signed-off-by: Sasha Levin --- drivers/tty/serial/sc16is7xx.c | 40 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index ab9159441d02..4def1b7266f6 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -641,9 +641,9 @@ static void sc16is7xx_handle_tx(struct uart_port *port) } if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_stop_tx(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return; } @@ -672,13 +672,13 @@ static void sc16is7xx_handle_tx(struct uart_port *port) sc16is7xx_fifo_write(port, to_send); } - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); if (uart_circ_empty(xmit)) sc16is7xx_stop_tx(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static unsigned int sc16is7xx_get_hwmctrl(struct uart_port *port) @@ -709,7 +709,7 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) one->old_mctrl = status; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); if ((changed & TIOCM_RNG) && (status & TIOCM_RNG)) port->icount.rng++; if (changed & TIOCM_DSR) @@ -720,7 +720,7 @@ static void sc16is7xx_update_mlines(struct sc16is7xx_one *one) uart_handle_cts_change(port, status & TIOCM_CTS); wake_up_interruptible(&port->state->port.delta_msr_wait); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) @@ -814,9 +814,9 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws) sc16is7xx_handle_tx(port); mutex_unlock(&one->efr_lock); - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static void sc16is7xx_reconf_rs485(struct uart_port *port) @@ -827,14 +827,14 @@ static void sc16is7xx_reconf_rs485(struct uart_port *port) struct serial_rs485 *rs485 = &port->rs485; unsigned long irqflags; - spin_lock_irqsave(&port->lock, irqflags); + uart_port_lock_irqsave(port, &irqflags); if (rs485->flags & SER_RS485_ENABLED) { efcr |= SC16IS7XX_EFCR_AUTO_RS485_BIT; if (rs485->flags & SER_RS485_RTS_AFTER_SEND) efcr |= SC16IS7XX_EFCR_RTS_INVERT_BIT; } - spin_unlock_irqrestore(&port->lock, irqflags); + uart_port_unlock_irqrestore(port, irqflags); sc16is7xx_port_update(port, SC16IS7XX_EFCR_REG, mask, efcr); } @@ -845,10 +845,10 @@ static void sc16is7xx_reg_proc(struct kthread_work *ws) struct sc16is7xx_one_config config; unsigned long irqflags; - spin_lock_irqsave(&one->port.lock, irqflags); + uart_port_lock_irqsave(&one->port, &irqflags); config = one->config; memset(&one->config, 0, sizeof(one->config)); - spin_unlock_irqrestore(&one->port.lock, irqflags); + uart_port_unlock_irqrestore(&one->port, irqflags); if (config.flags & SC16IS7XX_RECONF_MD) { u8 mcr = 0; @@ -954,18 +954,18 @@ static void sc16is7xx_throttle(struct uart_port *port) * value set in MCR register. Stop reading data from RX FIFO so the * AutoRTS feature will de-activate RTS output. */ - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_ier_clear(port, SC16IS7XX_IER_RDI_BIT); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static void sc16is7xx_unthrottle(struct uart_port *port) { unsigned long flags; - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_ier_set(port, SC16IS7XX_IER_RDI_BIT); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static unsigned int sc16is7xx_tx_empty(struct uart_port *port) @@ -1103,7 +1103,7 @@ static void sc16is7xx_set_termios(struct uart_port *port, /* Setup baudrate generator */ baud = sc16is7xx_set_baud(port, baud); - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); /* Update timeout according to new baud rate */ uart_update_timeout(port, termios->c_cflag, baud); @@ -1111,7 +1111,7 @@ static void sc16is7xx_set_termios(struct uart_port *port, if (UART_ENABLE_MS(port, termios->c_cflag)) sc16is7xx_enable_ms(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } static int sc16is7xx_config_rs485(struct uart_port *port, struct ktermios *termios, @@ -1197,9 +1197,9 @@ static int sc16is7xx_startup(struct uart_port *port) sc16is7xx_port_write(port, SC16IS7XX_IER_REG, val); /* Enable modem status polling */ - spin_lock_irqsave(&port->lock, flags); + uart_port_lock_irqsave(port, &flags); sc16is7xx_enable_ms(port); - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return 0; } From b168029d67e95ada58023b7d6f2e0630c044ffd6 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 11 Dec 2023 12:13:53 -0500 Subject: [PATCH 161/187] serial: sc16is7xx: fix unconditional activation of THRI interrupt [ Upstream commit 9915753037eba7135b209fef4f2afeca841af816 ] Commit cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop") changed behavior to unconditionnaly set the THRI interrupt in sc16is7xx_tx_proc(). For example when sending a 65 bytes message, and assuming the Tx FIFO is initially empty, sc16is7xx_handle_tx() will write the first 64 bytes of the message to the FIFO and sc16is7xx_tx_proc() will then activate THRI. When the THRI IRQ is fired, the driver will write the remaining byte of the message to the FIFO, and disable THRI by calling sc16is7xx_stop_tx(). When sending a 2 bytes message, sc16is7xx_handle_tx() will write the 2 bytes of the message to the FIFO and call sc16is7xx_stop_tx(), disabling THRI. After sc16is7xx_handle_tx() exits, control returns to sc16is7xx_tx_proc() which will unconditionally set THRI. When the THRI IRQ is fired, the driver simply acknowledges the interrupt and does nothing more, since all the data has already been written to the FIFO. This results in 2 register writes and 4 register reads all for nothing and taking precious cycles from the I2C/SPI bus. Fix this by enabling the THRI interrupt only when we fill the Tx FIFO to its maximum capacity and there are remaining bytes to send in the message. Fixes: cc4c1d05eb10 ("sc16is7xx: Properly resume TX after stop") Cc: Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20231211171353.2901416-7-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/sc16is7xx.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 4def1b7266f6..e331b57d6d7d 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -678,6 +678,8 @@ static void sc16is7xx_handle_tx(struct uart_port *port) if (uart_circ_empty(xmit)) sc16is7xx_stop_tx(port); + else + sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT); uart_port_unlock_irqrestore(port, flags); } @@ -804,7 +806,6 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws) { struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port); struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); - unsigned long flags; if ((port->rs485.flags & SER_RS485_ENABLED) && (port->rs485.delay_rts_before_send > 0)) @@ -813,10 +814,6 @@ static void sc16is7xx_tx_proc(struct kthread_work *ws) mutex_lock(&one->efr_lock); sc16is7xx_handle_tx(port); mutex_unlock(&one->efr_lock); - - uart_port_lock_irqsave(port, &flags); - sc16is7xx_ier_set(port, SC16IS7XX_IER_THRI_BIT); - uart_port_unlock_irqrestore(port, flags); } static void sc16is7xx_reconf_rs485(struct uart_port *port) From 4c45143447e6275cf101b2a81e5b7ff576d32caf Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 19 Dec 2023 01:02:28 +0900 Subject: [PATCH 162/187] btrfs: zoned: factor out prepare_allocation_zoned() [ Upstream commit b271fee9a41ca1474d30639fd6cc912c9901d0f8 ] Factor out prepare_allocation_zoned() for further extension. While at it, optimize the if-branch a bit. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba Stable-dep-of: 02444f2ac26e ("btrfs: zoned: optimize hint byte for zoned allocator") Signed-off-by: Sasha Levin --- fs/btrfs/extent-tree.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7be3fcfb3f97..2832abe3a984 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4207,6 +4207,24 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info, return 0; } +static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info, + struct find_free_extent_ctl *ffe_ctl) +{ + if (ffe_ctl->for_treelog) { + spin_lock(&fs_info->treelog_bg_lock); + if (fs_info->treelog_bg) + ffe_ctl->hint_byte = fs_info->treelog_bg; + spin_unlock(&fs_info->treelog_bg_lock); + } else if (ffe_ctl->for_data_reloc) { + spin_lock(&fs_info->relocation_bg_lock); + if (fs_info->data_reloc_bg) + ffe_ctl->hint_byte = fs_info->data_reloc_bg; + spin_unlock(&fs_info->relocation_bg_lock); + } + + return 0; +} + static int prepare_allocation(struct btrfs_fs_info *fs_info, struct find_free_extent_ctl *ffe_ctl, struct btrfs_space_info *space_info, @@ -4217,19 +4235,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, return prepare_allocation_clustered(fs_info, ffe_ctl, space_info, ins); case BTRFS_EXTENT_ALLOC_ZONED: - if (ffe_ctl->for_treelog) { - spin_lock(&fs_info->treelog_bg_lock); - if (fs_info->treelog_bg) - ffe_ctl->hint_byte = fs_info->treelog_bg; - spin_unlock(&fs_info->treelog_bg_lock); - } - if (ffe_ctl->for_data_reloc) { - spin_lock(&fs_info->relocation_bg_lock); - if (fs_info->data_reloc_bg) - ffe_ctl->hint_byte = fs_info->data_reloc_bg; - spin_unlock(&fs_info->relocation_bg_lock); - } - return 0; + return prepare_allocation_zoned(fs_info, ffe_ctl); default: BUG(); } From f91c77d2c363aaefdbb12469bae7305093d9f49b Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 19 Dec 2023 01:02:29 +0900 Subject: [PATCH 163/187] btrfs: zoned: optimize hint byte for zoned allocator [ Upstream commit 02444f2ac26eae6385a65fcd66915084d15dffba ] Writing sequentially to a huge file on btrfs on a SMR HDD revealed a decline of the performance (220 MiB/s to 30 MiB/s after 500 minutes). The performance goes down because of increased latency of the extent allocation, which is induced by a traversing of a lot of full block groups. So, this patch optimizes the ffe_ctl->hint_byte by choosing a block group with sufficient size from the active block group list, which does not contain full block groups. After applying the patch, the performance is maintained well. Fixes: 2eda57089ea3 ("btrfs: zoned: implement sequential extent allocation") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent-tree.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2832abe3a984..0d2cc186974d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4220,6 +4220,24 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info, if (fs_info->data_reloc_bg) ffe_ctl->hint_byte = fs_info->data_reloc_bg; spin_unlock(&fs_info->relocation_bg_lock); + } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) { + struct btrfs_block_group *block_group; + + spin_lock(&fs_info->zone_active_bgs_lock); + list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { + /* + * No lock is OK here because avail is monotinically + * decreasing, and this is just a hint. + */ + u64 avail = block_group->zone_capacity - block_group->alloc_offset; + + if (block_group_bits(block_group, ffe_ctl->flags) && + avail >= ffe_ctl->num_bytes) { + ffe_ctl->hint_byte = block_group->start; + break; + } + } + spin_unlock(&fs_info->zone_active_bgs_lock); } return 0; From ec5e692cbad49a2a31b49b1d153e41f31bb74a37 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Tue, 7 Nov 2023 12:41:51 -0800 Subject: [PATCH 164/187] drm/panel-edp: drm/panel-edp: Fix AUO B116XAK01 name and timing [ Upstream commit fc6e7679296530106ee0954e8ddef1aa58b2e0b5 ] Rename AUO 0x405c B116XAK01 to B116XAK01.0 and adjust the timing of auo_b116xak01: T3=200, T12=500, T7_max = 50 according to decoding edid and datasheet. Fixes: da458286a5e2 ("drm/panel: Add support for AUO B116XAK01 panel") Cc: stable@vger.kernel.org Signed-off-by: Hsin-Yi Wang Reviewed-by: Douglas Anderson Acked-by: Maxime Ripard Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231107204611.3082200-2-hsinyi@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-edp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index a163585a2a52..ecd6238749f7 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -975,6 +975,8 @@ static const struct panel_desc auo_b116xak01 = { }, .delay = { .hpd_absent = 200, + .unprepare = 500, + .enable = 50, }, }; @@ -1870,7 +1872,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('A', 'U', 'O', 0x1062, &delay_200_500_e50, "B120XAN01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x1e9b, &delay_200_500_e50, "B133UAN02.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x1ea5, &delay_200_500_e50, "B116XAK01.6"), - EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x8594, &delay_200_500_e50, "B133UAN01.0"), From f9a4c401bf4c5af3437ad221c0a5880a518068d4 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 28 Jan 2024 16:20:40 -0500 Subject: [PATCH 165/187] Revert "powerpc/64s: Increase default stack size to 32KB" This reverts commit 9ccf64e763aca088b0d25c1274af42b1a6a45135 which is upstream commit 18f14afe281648e31ed35c9ad2fcb724c4838ad9. Breaks the build: arch/powerpc/kvm/book3s_hv_rmhandlers.S:2689: Error: operand out of range (0x0000000000008310 is not between 0xffffffffffff8001 and 0x0000000000008000) make[3]: *** [scripts/Makefile.build:382: arch/powerpc/kvm/book3s_hv_rmhandlers.o] Error 1 Signed-off-by: Sasha Levin --- arch/powerpc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2c94f9cf1ce0..6050e6e10d32 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -806,7 +806,6 @@ config THREAD_SHIFT int "Thread shift" if EXPERT range 13 15 default "15" if PPC_256K_PAGES - default "15" if PPC_PSERIES || PPC_POWERNV default "14" if PPC64 default "13" help From 6b7fb2903aec8d00a54a533e1f4d46da9b801a43 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 21 Dec 2023 13:55:48 -0800 Subject: [PATCH 166/187] drm/bridge: parade-ps8640: Wait for HPD when doing an AUX transfer [ Upstream commit 024b32db43a359e0ded3fcc6cd86247cbbed4224 ] Unlike what is claimed in commit f5aa7d46b0ee ("drm/bridge: parade-ps8640: Provide wait_hpd_asserted() in struct drm_dp_aux"), if someone manually tries to do an AUX transfer (like via `i2cdump ${bus} 0x50 i`) while the panel is off we don't just get a simple transfer error. Instead, the whole ps8640 gets thrown for a loop and goes into a bad state. Let's put the function to wait for the HPD (and the magical 50 ms after first reset) back in when we're doing an AUX transfer. This shouldn't actually make things much slower (assuming the panel is on) because we should immediately poll and see the HPD high. Mostly this is just an extra i2c transfer to the bridge. Fixes: f5aa7d46b0ee ("drm/bridge: parade-ps8640: Provide wait_hpd_asserted() in struct drm_dp_aux") Tested-by: Pin-yen Lin Reviewed-by: Pin-yen Lin Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231221135548.1.I10f326a9305d57ad32cee7f8d9c60518c8be20fb@changeid Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/parade-ps8640.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 083337a27966..146e1ad76223 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -354,6 +354,11 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, int ret; pm_runtime_get_sync(dev); + ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); + if (ret) { + pm_runtime_put_sync_suspend(dev); + return ret; + } ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); From ed555f5b5cc9f9e83a405e82c51d9b60a78ee0e8 Mon Sep 17 00:00:00 2001 From: Markus Niebel Date: Thu, 12 Oct 2023 10:42:08 +0200 Subject: [PATCH 167/187] drm: panel-simple: add missing bus flags for Tianma tm070jvhg[30/33] [ Upstream commit 45dd7df26cee741b31c25ffdd44fb8794eb45ccd ] The DE signal is active high on this display, fill in the missing bus_flags. This aligns panel_desc with its display_timing. Fixes: 9a2654c0f62a ("drm/panel: Add and fill drm_panel type field") Fixes: b3bfcdf8a3b6 ("drm/panel: simple: add Tianma TM070JVHG33") Signed-off-by: Markus Niebel Signed-off-by: Alexander Stein Reviewed-by: Sam Ravnborg Link: https://lore.kernel.org/r/20231012084208.2731650-1-alexander.stein@ew.tq-group.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231012084208.2731650-1-alexander.stein@ew.tq-group.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 005377f58eb4..b714ee1bcbaa 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3603,6 +3603,7 @@ static const struct panel_desc tianma_tm070jdhg30 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct panel_desc tianma_tm070jvhg33 = { @@ -3615,6 +3616,7 @@ static const struct panel_desc tianma_tm070jvhg33 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct display_timing tianma_tm070rvhg71_timing = { From c46f9c7f93f694d397714af3b866eb5f981be6af Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 30 Nov 2022 11:22:37 +0200 Subject: [PATCH 168/187] drm/bridge: sii902x: Use devm_regulator_bulk_get_enable() [ Upstream commit ff1eae1201a46f997126297d2d3440baa2d1b9a9 ] Simplify using devm_regulator_bulk_get_enable() Signed-off-by: Matti Vaittinen Acked-by: Robert Foss Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/e6153c7beb2076b9ea13082b2024ec3296bc08bc.1669799805.git.mazziesaccount@gmail.com Stable-dep-of: 08ac6f132dd7 ("drm/bridge: sii902x: Fix probing race issue") Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/sii902x.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 878fb7d3732b..f6e8b401069b 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -171,7 +171,6 @@ struct sii902x { struct drm_connector connector; struct gpio_desc *reset_gpio; struct i2c_mux_core *i2cmux; - struct regulator_bulk_data supplies[2]; bool sink_is_hdmi; /* * Mutex protects audio and video functions from interfering @@ -1072,6 +1071,7 @@ static int sii902x_probe(struct i2c_client *client, struct device *dev = &client->dev; struct device_node *endpoint; struct sii902x *sii902x; + static const char * const supplies[] = {"iovcc", "cvcc12"}; int ret; ret = i2c_check_functionality(client->adapter, @@ -1122,27 +1122,11 @@ static int sii902x_probe(struct i2c_client *client, mutex_init(&sii902x->mutex); - sii902x->supplies[0].supply = "iovcc"; - sii902x->supplies[1].supply = "cvcc12"; - ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(sii902x->supplies), - sii902x->supplies); + ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(supplies), supplies); if (ret < 0) - return ret; + return dev_err_probe(dev, ret, "Failed to enable supplies"); - ret = regulator_bulk_enable(ARRAY_SIZE(sii902x->supplies), - sii902x->supplies); - if (ret < 0) { - dev_err_probe(dev, ret, "Failed to enable supplies"); - return ret; - } - - ret = sii902x_init(sii902x); - if (ret < 0) { - regulator_bulk_disable(ARRAY_SIZE(sii902x->supplies), - sii902x->supplies); - } - - return ret; + return sii902x_init(sii902x); } static void sii902x_remove(struct i2c_client *client) @@ -1152,8 +1136,6 @@ static void sii902x_remove(struct i2c_client *client) i2c_mux_del_adapters(sii902x->i2cmux); drm_bridge_remove(&sii902x->bridge); - regulator_bulk_disable(ARRAY_SIZE(sii902x->supplies), - sii902x->supplies); } static const struct of_device_id sii902x_dt_ids[] = { From e0f83c234ea7a3dec1f84e5d02caa1c51664a076 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 3 Jan 2024 15:31:07 +0200 Subject: [PATCH 169/187] drm/bridge: sii902x: Fix probing race issue [ Upstream commit 08ac6f132dd77e40f786d8af51140c96c6d739c9 ] A null pointer dereference crash has been observed rarely on TI platforms using sii9022 bridge: [ 53.271356] sii902x_get_edid+0x34/0x70 [sii902x] [ 53.276066] sii902x_bridge_get_edid+0x14/0x20 [sii902x] [ 53.281381] drm_bridge_get_edid+0x20/0x34 [drm] [ 53.286305] drm_bridge_connector_get_modes+0x8c/0xcc [drm_kms_helper] [ 53.292955] drm_helper_probe_single_connector_modes+0x190/0x538 [drm_kms_helper] [ 53.300510] drm_client_modeset_probe+0x1f0/0xbd4 [drm] [ 53.305958] __drm_fb_helper_initial_config_and_unlock+0x50/0x510 [drm_kms_helper] [ 53.313611] drm_fb_helper_initial_config+0x48/0x58 [drm_kms_helper] [ 53.320039] drm_fbdev_dma_client_hotplug+0x84/0xd4 [drm_dma_helper] [ 53.326401] drm_client_register+0x5c/0xa0 [drm] [ 53.331216] drm_fbdev_dma_setup+0xc8/0x13c [drm_dma_helper] [ 53.336881] tidss_probe+0x128/0x264 [tidss] [ 53.341174] platform_probe+0x68/0xc4 [ 53.344841] really_probe+0x188/0x3c4 [ 53.348501] __driver_probe_device+0x7c/0x16c [ 53.352854] driver_probe_device+0x3c/0x10c [ 53.357033] __device_attach_driver+0xbc/0x158 [ 53.361472] bus_for_each_drv+0x88/0xe8 [ 53.365303] __device_attach+0xa0/0x1b4 [ 53.369135] device_initial_probe+0x14/0x20 [ 53.373314] bus_probe_device+0xb0/0xb4 [ 53.377145] deferred_probe_work_func+0xcc/0x124 [ 53.381757] process_one_work+0x1f0/0x518 [ 53.385770] worker_thread+0x1e8/0x3dc [ 53.389519] kthread+0x11c/0x120 [ 53.392750] ret_from_fork+0x10/0x20 The issue here is as follows: - tidss probes, but is deferred as sii902x is still missing. - sii902x starts probing and enters sii902x_init(). - sii902x calls drm_bridge_add(). Now the sii902x bridge is ready from DRM's perspective. - sii902x calls sii902x_audio_codec_init() and platform_device_register_data() - The registration of the audio platform device causes probing of the deferred devices. - tidss probes, which eventually causes sii902x_bridge_get_edid() to be called. - sii902x_bridge_get_edid() tries to use the i2c to read the edid. However, the sii902x driver has not set up the i2c part yet, leading to the crash. Fix this by moving the drm_bridge_add() to the end of the sii902x_init(), which is also at the very end of sii902x_probe(). Signed-off-by: Tomi Valkeinen Fixes: 21d808405fe4 ("drm/bridge/sii902x: Fix EDID readback") Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20240103-si902x-fixes-v1-1-b9fd3e448411@ideasonboard.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240103-si902x-fixes-v1-1-b9fd3e448411@ideasonboard.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/sii902x.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index f6e8b401069b..f648d686eb37 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -1040,16 +1040,6 @@ static int sii902x_init(struct sii902x *sii902x) return ret; } - sii902x->bridge.funcs = &sii902x_bridge_funcs; - sii902x->bridge.of_node = dev->of_node; - sii902x->bridge.timings = &default_sii902x_timings; - sii902x->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID; - - if (sii902x->i2c->irq > 0) - sii902x->bridge.ops |= DRM_BRIDGE_OP_HPD; - - drm_bridge_add(&sii902x->bridge); - sii902x_audio_codec_init(sii902x, dev); i2c_set_clientdata(sii902x->i2c, sii902x); @@ -1062,7 +1052,21 @@ static int sii902x_init(struct sii902x *sii902x) return -ENOMEM; sii902x->i2cmux->priv = sii902x; - return i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + if (ret) + return ret; + + sii902x->bridge.funcs = &sii902x_bridge_funcs; + sii902x->bridge.of_node = dev->of_node; + sii902x->bridge.timings = &default_sii902x_timings; + sii902x->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID; + + if (sii902x->i2c->irq > 0) + sii902x->bridge.ops |= DRM_BRIDGE_OP_HPD; + + drm_bridge_add(&sii902x->bridge); + + return 0; } static int sii902x_probe(struct i2c_client *client, @@ -1130,12 +1134,11 @@ static int sii902x_probe(struct i2c_client *client, } static void sii902x_remove(struct i2c_client *client) - { struct sii902x *sii902x = i2c_get_clientdata(client); - i2c_mux_del_adapters(sii902x->i2cmux); drm_bridge_remove(&sii902x->bridge); + i2c_mux_del_adapters(sii902x->i2cmux); } static const struct of_device_id sii902x_dt_ids[] = { From 279f1cc5626156fc344a645cbc3cba2ec8ecba66 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 3 Jan 2024 15:31:08 +0200 Subject: [PATCH 170/187] drm/bridge: sii902x: Fix audio codec unregistration [ Upstream commit 3fc6c76a8d208d3955c9e64b382d0ff370bc61fc ] The driver never unregisters the audio codec platform device, which can lead to a crash on module reloading, nor does it handle the return value from sii902x_audio_codec_init(). Signed-off-by: Tomi Valkeinen Fixes: ff5781634c41 ("drm/bridge: sii902x: Implement HDMI audio support") Cc: Jyri Sarha Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20240103-si902x-fixes-v1-2-b9fd3e448411@ideasonboard.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240103-si902x-fixes-v1-2-b9fd3e448411@ideasonboard.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/sii902x.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index f648d686eb37..6359d6f53a1b 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -1040,7 +1040,9 @@ static int sii902x_init(struct sii902x *sii902x) return ret; } - sii902x_audio_codec_init(sii902x, dev); + ret = sii902x_audio_codec_init(sii902x, dev); + if (ret) + return ret; i2c_set_clientdata(sii902x->i2c, sii902x); @@ -1048,13 +1050,15 @@ static int sii902x_init(struct sii902x *sii902x) 1, 0, I2C_MUX_GATE, sii902x_i2c_bypass_select, sii902x_i2c_bypass_deselect); - if (!sii902x->i2cmux) - return -ENOMEM; + if (!sii902x->i2cmux) { + ret = -ENOMEM; + goto err_unreg_audio; + } sii902x->i2cmux->priv = sii902x; ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); if (ret) - return ret; + goto err_unreg_audio; sii902x->bridge.funcs = &sii902x_bridge_funcs; sii902x->bridge.of_node = dev->of_node; @@ -1067,6 +1071,12 @@ static int sii902x_init(struct sii902x *sii902x) drm_bridge_add(&sii902x->bridge); return 0; + +err_unreg_audio: + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); + + return ret; } static int sii902x_probe(struct i2c_client *client, @@ -1139,6 +1149,9 @@ static void sii902x_remove(struct i2c_client *client) drm_bridge_remove(&sii902x->bridge); i2c_mux_del_adapters(sii902x->i2cmux); + + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); } static const struct of_device_id sii902x_dt_ids[] = { From 6ba690e7f7a4c1fd29b6acf96e70463548d4b85a Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Tue, 9 Jan 2024 20:04:57 +0800 Subject: [PATCH 171/187] drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable() [ Upstream commit 26db46bc9c675e43230cc6accd110110a7654299 ] The ps8640 bridge seems to expect everything to be power cycled at the disable process, but sometimes ps8640_aux_transfer() holds the runtime PM reference and prevents the bridge from suspend. Prevent that by introducing a mutex lock between ps8640_aux_transfer() and .post_disable() to make sure the bridge is really powered off. Fixes: 826cff3f7ebb ("drm/bridge: parade-ps8640: Enable runtime power management") Signed-off-by: Pin-yen Lin Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240109120528.1292601-1-treapking@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/parade-ps8640.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 146e1ad76223..3982568bd093 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -106,6 +106,7 @@ struct ps8640 { struct device_link *link; bool pre_enabled; bool need_post_hpd_delay; + struct mutex aux_lock; }; static const struct regmap_config ps8640_regmap_config[] = { @@ -353,6 +354,7 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, struct device *dev = &ps_bridge->page[PAGE0_DP_CNTL]->dev; int ret; + mutex_lock(&ps_bridge->aux_lock); pm_runtime_get_sync(dev); ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); if (ret) { @@ -362,6 +364,7 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + mutex_unlock(&ps_bridge->aux_lock); return ret; } @@ -481,7 +484,18 @@ static void ps8640_post_disable(struct drm_bridge *bridge) ps_bridge->pre_enabled = false; ps8640_bridge_vdo_control(ps_bridge, DISABLE); + + /* + * The bridge seems to expect everything to be power cycled at the + * disable process, so grab a lock here to make sure + * ps8640_aux_transfer() is not holding a runtime PM reference and + * preventing the bridge from suspend. + */ + mutex_lock(&ps_bridge->aux_lock); + pm_runtime_put_sync_suspend(&ps_bridge->page[PAGE0_DP_CNTL]->dev); + + mutex_unlock(&ps_bridge->aux_lock); } static int ps8640_bridge_attach(struct drm_bridge *bridge, @@ -662,6 +676,8 @@ static int ps8640_probe(struct i2c_client *client) if (!ps_bridge) return -ENOMEM; + mutex_init(&ps_bridge->aux_lock); + ps_bridge->supplies[0].supply = "vdd12"; ps_bridge->supplies[1].supply = "vdd33"; ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ps_bridge->supplies), From c1cd4f9da526dfd34e5c4549aaaeec3fdd8c729e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 17 Jan 2024 10:35:03 -0800 Subject: [PATCH 172/187] drm/bridge: parade-ps8640: Make sure we drop the AUX mutex in the error case [ Upstream commit a20f1b02bafcbf5a32d96a1d4185d6981cf7d016 ] After commit 26db46bc9c67 ("drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable()"), if we hit the error case in ps8640_aux_transfer() then we return without dropping the mutex. Fix this oversight. Fixes: 26db46bc9c67 ("drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable()") Reviewed-by: Hsin-Yi Wang Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240117103502.1.Ib726a0184913925efc7e99c4d4fc801982e1bc24@changeid Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/parade-ps8640.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 3982568bd093..09737acc2cf4 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -359,11 +359,13 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); if (ret) { pm_runtime_put_sync_suspend(dev); - return ret; + goto exit; } ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + +exit: mutex_unlock(&ps_bridge->aux_lock); return ret; From ba930885bfd250047f6e788614e5042f939f3f69 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Dec 2023 13:32:15 +0100 Subject: [PATCH 173/187] drm/exynos: fix accidental on-stack copy of exynos_drm_plane [ Upstream commit 960b537e91725bcb17dd1b19e48950e62d134078 ] gcc rightfully complains about excessive stack usage in the fimd_win_set_pixfmt() function: drivers/gpu/drm/exynos/exynos_drm_fimd.c: In function 'fimd_win_set_pixfmt': drivers/gpu/drm/exynos/exynos_drm_fimd.c:750:1: error: the frame size of 1032 bytes is larger than 1024 byte drivers/gpu/drm/exynos/exynos5433_drm_decon.c: In function 'decon_win_set_pixfmt': drivers/gpu/drm/exynos/exynos5433_drm_decon.c:381:1: error: the frame size of 1032 bytes is larger than 1024 bytes There is really no reason to copy the large exynos_drm_plane structure to the stack before using one of its members, so just use a pointer instead. Fixes: 6f8ee5c21722 ("drm/exynos: fimd: Make plane alpha configurable") Signed-off-by: Arnd Bergmann Reviewed-by: Marek Szyprowski Signed-off-by: Inki Dae Signed-off-by: Sasha Levin --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++-- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 8155d7e650f1..4bc5d9470acd 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -319,9 +319,9 @@ static void decon_win_set_bldmod(struct decon_context *ctx, unsigned int win, static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, struct drm_framebuffer *fb) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); unsigned int alpha = state->base.alpha; unsigned int pixel_alpha; unsigned long val; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index ae6636e6658e..529033b980b2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -662,9 +662,9 @@ static void fimd_win_set_bldmod(struct fimd_context *ctx, unsigned int win, static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, struct drm_framebuffer *fb, int width) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); uint32_t pixel_format = fb->format->format; unsigned int alpha = state->base.alpha; u32 val = WINCONx_ENWIN; From 23cf4cf4294f252d4ffbf25f1885eb23306e1557 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 20 Dec 2023 12:53:15 +0300 Subject: [PATCH 174/187] drm/exynos: gsc: minor fix for loop iteration in gsc_runtime_resume [ Upstream commit 4050957c7c2c14aa795dbf423b4180d5ac04e113 ] Do not forget to call clk_disable_unprepare() on the first element of ctx->clocks array. Found by Linux Verification Center (linuxtesting.org). Fixes: 8b7d3ec83aba ("drm/exynos: gsc: Convert driver to IPP v2 core API") Signed-off-by: Fedor Pchelkin Reviewed-by: Marek Szyprowski Signed-off-by: Inki Dae Signed-off-by: Sasha Levin --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 964dceb28c1e..68ea92742b06 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1342,7 +1342,7 @@ static int __maybe_unused gsc_runtime_resume(struct device *dev) for (i = 0; i < ctx->num_clocks; i++) { ret = clk_prepare_enable(ctx->clocks[i]); if (ret) { - while (--i > 0) + while (--i >= 0) clk_disable_unprepare(ctx->clocks[i]); return ret; } From 4b84411165f841b1feb7c21a4db72c8211e1aa58 Mon Sep 17 00:00:00 2001 From: Wenhua Lin Date: Tue, 9 Jan 2024 15:38:48 +0800 Subject: [PATCH 175/187] gpio: eic-sprd: Clear interrupt after set the interrupt type [ Upstream commit 84aef4ed59705585d629e81d633a83b7d416f5fb ] The raw interrupt status of eic maybe set before the interrupt is enabled, since the eic interrupt has a latch function, which would trigger the interrupt event once enabled it from user side. To solve this problem, interrupts generated before setting the interrupt trigger type are ignored. Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support") Acked-by: Chunyan Zhang Signed-off-by: Wenhua Lin Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-eic-sprd.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index 8d722e026e9c..c2857ed0c78a 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -318,20 +318,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) switch (flow_type) { case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1); + sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0); + sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1); break; case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_EDGE_FALLING: case IRQ_TYPE_EDGE_BOTH: state = sprd_eic_get(chip, offset); - if (state) + if (state) { sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0); - else + sprd_eic_update(chip, offset, + SPRD_EIC_DBNC_IC, 1); + } else { sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1); + sprd_eic_update(chip, offset, + SPRD_EIC_DBNC_IC, 1); + } break; default: return -ENOTSUPP; @@ -343,20 +350,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) switch (flow_type) { case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1); break; case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_EDGE_FALLING: case IRQ_TYPE_EDGE_BOTH: state = sprd_eic_get(chip, offset); - if (state) + if (state) { sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0); - else + sprd_eic_update(chip, offset, + SPRD_EIC_LATCH_INTCLR, 1); + } else { sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1); + sprd_eic_update(chip, offset, + SPRD_EIC_LATCH_INTCLR, 1); + } break; default: return -ENOTSUPP; @@ -370,29 +384,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; default: @@ -405,29 +424,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; default: From 9564767b67f4152d345742e95d3e5ee9d0b02ed0 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Thu, 18 Jan 2024 21:04:01 +0800 Subject: [PATCH 176/187] block: Move checking GENHD_FL_NO_PART to bdev_add_partition() [ Upstream commit 7777f47f2ea64efd1016262e7b59fab34adfb869 ] Commit 1a721de8489f ("block: don't add or resize partition on the disk with GENHD_FL_NO_PART") prevented all operations about partitions on disks with GENHD_FL_NO_PART in blkpg_do_ioctl() since they are meaningless. However, it changed error code in some scenarios. So move checking GENHD_FL_NO_PART to bdev_add_partition() to eliminate impact. Fixes: 1a721de8489f ("block: don't add or resize partition on the disk with GENHD_FL_NO_PART") Reported-by: Allison Karlitskaya Closes: https://lore.kernel.org/all/CAOYeF9VsmqKMcQjo1k6YkGNujwN-nzfxY17N3F-CMikE1tYp+w@mail.gmail.com/ Signed-off-by: Li Lingfeng Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20240118130401.792757-1-lilingfeng@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/ioctl.c | 2 -- block/partitions/core.c | 5 +++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index ebe4a2653622..47567ba1185a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -20,8 +20,6 @@ static int blkpg_do_ioctl(struct block_device *bdev, struct blkpg_partition p; sector_t start, length; - if (disk->flags & GENHD_FL_NO_PART) - return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) diff --git a/block/partitions/core.c b/block/partitions/core.c index b8112f52d388..3927f4283f6b 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -453,6 +453,11 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start, goto out; } + if (disk->flags & GENHD_FL_NO_PART) { + ret = -EINVAL; + goto out; + } + if (partition_overlaps(disk, start, length, -1)) { ret = -EBUSY; goto out; From a2fa86e2bb67769e2a53bfeb95956137069071dc Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Wed, 17 Jan 2024 17:58:14 -0800 Subject: [PATCH 177/187] drm/bridge: anx7625: Ensure bridge is suspended in disable() [ Upstream commit 4d5b7daa3c610af3f322ad1e91fc0c752ff32f0e ] Similar to commit 26db46bc9c67 ("drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable()"). Add a mutex to ensure that aux transfer won't race with atomic_disable by holding the PM reference and prevent the bridge from suspend. Also we need to use pm_runtime_put_sync_suspend() to suspend the bridge instead of idle with pm_runtime_put_sync(). Fixes: 3203e497eb76 ("drm/bridge: anx7625: Synchronously run runtime suspend.") Fixes: adca62ec370c ("drm/bridge: anx7625: Support reading edid through aux channel") Signed-off-by: Hsin-Yi Wang Tested-by: Xuxin Xiong Reviewed-by: Pin-yen Lin Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240118015916.2296741-1-hsinyi@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/analogix/anx7625.c | 7 ++++++- drivers/gpu/drm/bridge/analogix/anx7625.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index cf86cc05b7fc..5f8137e9cfd7 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1742,6 +1742,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, u8 request = msg->request & ~DP_AUX_I2C_MOT; int ret = 0; + mutex_lock(&ctx->aux_lock); pm_runtime_get_sync(dev); msg->reply = 0; switch (request) { @@ -1758,6 +1759,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, msg->size, msg->buffer); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + mutex_unlock(&ctx->aux_lock); return ret; } @@ -2454,7 +2456,9 @@ static void anx7625_bridge_atomic_disable(struct drm_bridge *bridge, ctx->connector = NULL; anx7625_dp_stop(ctx); - pm_runtime_put_sync(dev); + mutex_lock(&ctx->aux_lock); + pm_runtime_put_sync_suspend(dev); + mutex_unlock(&ctx->aux_lock); } static enum drm_connector_status @@ -2648,6 +2652,7 @@ static int anx7625_i2c_probe(struct i2c_client *client) mutex_init(&platform->lock); mutex_init(&platform->hdcp_wq_lock); + mutex_init(&platform->aux_lock); INIT_DELAYED_WORK(&platform->hdcp_work, hdcp_check_work_func); platform->hdcp_workqueue = create_workqueue("hdcp workqueue"); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index 14f33d6be289..239956199e1b 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -471,6 +471,8 @@ struct anx7625_data { struct workqueue_struct *hdcp_workqueue; /* Lock for hdcp work queue */ struct mutex hdcp_wq_lock; + /* Lock for aux transfer and disable */ + struct mutex aux_lock; char edid_block; struct display_timing dt; u8 display_timing_valid; From 598af91f622bab00342cd38f7514add9400d20cf Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Tue, 9 Jan 2024 16:00:32 -0500 Subject: [PATCH 178/187] spi: bcm-qspi: fix SFDP BFPT read by usig mspi read [ Upstream commit 574bf7bbe83794a902679846770f75a9b7f28176 ] SFDP read shall use the mspi reads when using the bcm_qspi_exec_mem_op() call. This fixes SFDP parameter page read failures seen with parts that now use SFDP protocol to read the basic flash parameter table. Fixes: 5f195ee7d830 ("spi: bcm-qspi: Implement the spi_mem interface") Signed-off-by: Kamal Dasu Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://msgid.link/r/20240109210033.43249-1-kamal.dasu@broadcom.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcm-qspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 137e7315a3cf..b24955910147 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include "spi-bcm-qspi.h" @@ -1221,7 +1221,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, /* non-aligned and very short transfers are handled by MSPI */ if (!IS_ALIGNED((uintptr_t)addr, 4) || !IS_ALIGNED((uintptr_t)buf, 4) || - len < 4) + len < 4 || op->cmd.opcode == SPINOR_OP_RDSFDP) mspi_read = true; if (!has_bspi(qspi) || mspi_read) From b086f6d979773315426ed7aabe3be02127664d25 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 25 Jan 2024 14:53:09 -0600 Subject: [PATCH 179/187] spi: fix finalize message on error return [ Upstream commit 8c2ae772fe08e33f3d7a83849e85539320701abd ] In __spi_pump_transfer_message(), the message was not finalized in the first error return as it is in the other error return paths. Not finalizing the message could cause anything waiting on the message to complete to hang forever. This adds the missing call to spi_finalize_current_message(). Fixes: ae7d2346dc89 ("spi: Don't use the message queue if possible in spi_sync") Signed-off-by: David Lechner Link: https://msgid.link/r/20240125205312.3458541-2-dlechner@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 22d227878bc4..19688f333e0b 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1636,6 +1636,10 @@ static int __spi_pump_transfer_message(struct spi_controller *ctlr, pm_runtime_put_noidle(ctlr->dev.parent); dev_err(&ctlr->dev, "Failed to power device: %d\n", ret); + + msg->status = ret; + spi_finalize_current_message(ctlr); + return ret; } } From 1111abee590ddc3f86e5fb119a9def756c217bb1 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Mon, 22 Jan 2024 19:47:09 +0100 Subject: [PATCH 180/187] MIPS: lantiq: register smp_ops on non-smp platforms [ Upstream commit 4bf2a626dc4bb46f0754d8ac02ec8584ff114ad5 ] Lantiq uses a common kernel config for devices with 24Kc and 34Kc cores. The changes made previously to add support for interrupts on all cores work on 24Kc platforms with SMP disabled and 34Kc platforms with SMP enabled. This patch fixes boot issues on Danube (single core 24Kc) with SMP enabled. Fixes: 730320fd770d ("MIPS: lantiq: enable all hardware interrupts on second VPE") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/lantiq/prom.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index be4829cc7a3a..28da4e720d17 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -114,10 +114,9 @@ void __init prom_init(void) prom_init_cmdline(); #if defined(CONFIG_MIPS_MT_SMP) - if (cpu_has_mipsmt) { - lantiq_smp_ops = vsmp_smp_ops; + lantiq_smp_ops = vsmp_smp_ops; + if (cpu_has_mipsmt) lantiq_smp_ops.init_secondary = lantiq_init_secondary; - register_smp_ops(&lantiq_smp_ops); - } + register_smp_ops(&lantiq_smp_ops); #endif } From ec745eeff4d2824e983f6f00bbff2a8a8003972b Mon Sep 17 00:00:00 2001 From: Quanquan Cao Date: Wed, 24 Jan 2024 17:15:26 +0800 Subject: [PATCH 181/187] =?UTF-8?q?cxl/region=EF=BC=9AFix=20overflow=20iss?= =?UTF-8?q?ue=20in=20alloc=5Fhpa()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d76779dd3681c01a4c6c3cae4d0627c9083e0ee6 upstream. Creating a region with 16 memory devices caused a problem. The div_u64_rem function, used for dividing an unsigned 64-bit number by a 32-bit one, faced an issue when SZ_256M * p->interleave_ways. The result surpassed the maximum limit of the 32-bit divisor (4G), leading to an overflow and a remainder of 0. note: At this point, p->interleave_ways is 16, meaning 16 * 256M = 4G To fix this issue, I replaced the div_u64_rem function with div64_u64_rem and adjusted the type of the remainder. Signed-off-by: Quanquan Cao Reviewed-by: Dave Jiang Fixes: 23a22cd1c98b ("cxl/region: Allocate HPA capacity to regions") Cc: Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/region.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 2f7187dbfa2d..5b7d848a6f01 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -450,7 +450,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_region_params *p = &cxlr->params; struct resource *res; - u32 remainder = 0; + u64 remainder = 0; lockdep_assert_held_write(&cxl_region_rwsem); @@ -470,7 +470,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) return -ENXIO; - div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder); + div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder); if (remainder) return -EINVAL; From e333bbb557dc3af79ee3f9f710ef03097bb91435 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 27 Jan 2024 05:05:57 +0800 Subject: [PATCH 182/187] mips: Call lose_fpu(0) before initializing fcr31 in mips_set_personality_nan commit 59be5c35850171e307ca5d3d703ee9ff4096b948 upstream. If we still own the FPU after initializing fcr31, when we are preempted the dirty value in the FPU will be read out and stored into fcr31, clobbering our setting. This can cause an improper floating-point environment after execve(). For example: zsh% cat measure.c #include int main() { return fetestexcept(FE_INEXACT); } zsh% cc measure.c -o measure -lm zsh% echo $((1.0/3)) # raising FE_INEXACT 0.33333333333333331 zsh% while ./measure; do ; done (stopped in seconds) Call lose_fpu(0) before setting fcr31 to prevent this. Closes: https://lore.kernel.org/linux-mips/7a6aa1bbdbbe2e63ae96ff163fab0349f58f1b9e.camel@xry111.site/ Fixes: 9b26616c8d9d ("MIPS: Respect the ISA level in FCSR handling") Cc: stable@vger.kernel.org Signed-off-by: Xi Ruoyao Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/elf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index 5582a4ca1e9e..7aa2c2360ff6 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -11,6 +11,7 @@ #include #include +#include #ifdef CONFIG_MIPS_FP_SUPPORT @@ -309,6 +310,11 @@ void mips_set_personality_nan(struct arch_elf_state *state) struct cpuinfo_mips *c = &boot_cpu_data; struct task_struct *t = current; + /* Do this early so t->thread.fpu.fcr31 won't be clobbered in case + * we are preempted before the lose_fpu(0) in start_thread. + */ + lose_fpu(0); + t->thread.fpu.fcr31 = c->fpu_csr31; switch (state->nan_2008) { case 0: From cf0b4ba4b0f1d5c66839e48c5a7ad1ec17461d4d Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Mon, 22 Jan 2024 15:35:34 -0800 Subject: [PATCH 183/187] tick/sched: Preserve number of idle sleeps across CPU hotplug events commit 9a574ea9069be30b835a3da772c039993c43369b upstream. Commit 71fee48f ("tick-sched: Fix idle and iowait sleeptime accounting vs CPU hotplug") preserved total idle sleep time and iowait sleeptime across CPU hotplug events. Similar reasoning applies to the number of idle calls and idle sleeps to get the proper average of sleep time per idle invocation. Preserve those fields too. Fixes: 71fee48f ("tick-sched: Fix idle and iowait sleeptime accounting vs CPU hotplug") Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240122233534.3094238-1-tim.c.chen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-sched.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1a5d02224d46..8cfdc6b978d7 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1557,6 +1557,7 @@ void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t idle_sleeptime, iowait_sleeptime; + unsigned long idle_calls, idle_sleeps; # ifdef CONFIG_HIGH_RES_TIMERS if (ts->sched_timer.base) @@ -1565,9 +1566,13 @@ void tick_cancel_sched_timer(int cpu) idle_sleeptime = ts->idle_sleeptime; iowait_sleeptime = ts->iowait_sleeptime; + idle_calls = ts->idle_calls; + idle_sleeps = ts->idle_sleeps; memset(ts, 0, sizeof(*ts)); ts->idle_sleeptime = idle_sleeptime; ts->iowait_sleeptime = iowait_sleeptime; + ts->idle_calls = idle_calls; + ts->idle_sleeps = idle_sleeps; } #endif From c02d3872c8dd53c2760520992b6ce527081553c4 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Wed, 10 Jan 2024 15:01:22 +0200 Subject: [PATCH 184/187] x86/entry/ia32: Ensure s32 is sign extended to s64 commit 56062d60f117dccfb5281869e0ab61e090baf864 upstream. Presently ia32 registers stored in ptregs are unconditionally cast to unsigned int by the ia32 stub. They are then cast to long when passed to __se_sys*, but will not be sign extended. This takes the sign of the syscall argument into account in the ia32 stub. It still casts to unsigned int to avoid implementation specific behavior. However then casts to int or unsigned int as necessary. So that the following cast to long sign extends the value. This fixes the io_pgetevents02 LTP test when compiled with -m32. Presently the systemcall io_pgetevents_time64() unexpectedly accepts -1 for the maximum number of events. It doesn't appear other systemcalls with signed arguments are effected because they all have compat variants defined and wired up. Fixes: ebeb8c82ffaf ("syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32") Suggested-by: Arnd Bergmann Signed-off-by: Richard Palethorpe Signed-off-by: Nikolay Borisov Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240110130122.3836513-1-nik.borisov@suse.com Link: https://lore.kernel.org/ltp/20210921130127.24131-1-rpalethorpe@suse.com/ Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/syscall_wrapper.h | 25 +++++++++++++++++++++---- include/linux/syscalls.h | 1 + 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index fd2669b1cb2d..e3323a9dc911 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -58,12 +58,29 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); ,,regs->di,,regs->si,,regs->dx \ ,,regs->r10,,regs->r8,,regs->r9) \ + +/* SYSCALL_PT_ARGS is Adapted from s390x */ +#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \ + SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp)) +#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \ + SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di)) +#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \ + SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si)) +#define SYSCALL_PT_ARG3(m, t1, t2, t3) \ + SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx)) +#define SYSCALL_PT_ARG2(m, t1, t2) \ + SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx)) +#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx)) +#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__) + +#define __SC_COMPAT_CAST(t, a) \ + (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \ + (unsigned int)a + /* Mapping of registers to parameters for syscalls on i386 */ #define SC_IA32_REGS_TO_ARGS(x, ...) \ - __MAP(x,__SC_ARGS \ - ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \ - ,,(unsigned int)regs->dx,,(unsigned int)regs->si \ - ,,(unsigned int)regs->di,,(unsigned int)regs->bp) + SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \ + __MAP(x, __SC_TYPE, __VA_ARGS__)) \ #define __SYS_STUB0(abi, name) \ long __##abi##_##name(const struct pt_regs *regs); \ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a34b0f9a9972..27a6ba1c0ec4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -123,6 +123,7 @@ enum landlock_rule_type; #define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (__force t) a +#define __SC_TYPE(t, a) t #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) From e90c7d26cab159b80b63ff520c54e0f4de02e1d6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 26 Sep 2023 21:41:28 -0700 Subject: [PATCH 185/187] serial: core: fix kernel-doc for uart_port_unlock_irqrestore() commit 29bff582b74ed0bdb7e6986482ad9e6799ea4d2f upstream. Fix the function name to avoid a kernel-doc warning: include/linux/serial_core.h:666: warning: expecting prototype for uart_port_lock_irqrestore(). Prototype was for uart_port_unlock_irqrestore() instead Fixes: b0af4bcb4946 ("serial: core: Provide port lock wrappers") Signed-off-by: Randy Dunlap Cc: Thomas Gleixner Cc: John Ogness Cc: linux-serial@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Jiri Slaby Reviewed-by: John Ogness Link: https://lore.kernel.org/r/20230927044128.4748-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 6055dbe5c12e..13bf20242b61 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -653,7 +653,7 @@ static inline void uart_port_unlock_irq(struct uart_port *up) } /** - * uart_port_lock_irqrestore - Unlock the UART port, restore interrupts + * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts * @up: Pointer to UART port structure * @flags: The saved interrupt flags for restore */ From d7dc6a860491f716389c9c876a10ed90bff45955 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Fri, 2 Dec 2022 22:14:55 +0200 Subject: [PATCH 186/187] net/mlx5e: Handle hardware IPsec limits events commit 8c582ddfbb473c1d799c40b5140aed81278e2837 upstream. Enable object changed event to signal IPsec about hitting soft and hard limits. Signed-off-by: Leon Romanovsky Signed-off-by: Steffen Klassert Cc: Florian Fainelli Cc: Naresh Kamboju [ only take the #ifdef movement portion of the commit to resolve some 6.1.y build errors - gregkh ] Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 16bcceec16c4..785f188148d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -34,7 +34,6 @@ #ifndef __MLX5E_IPSEC_H__ #define __MLX5E_IPSEC_H__ -#ifdef CONFIG_MLX5_EN_IPSEC #include #include @@ -146,6 +145,7 @@ struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_modify_state_work modify_work; }; +#ifdef CONFIG_MLX5_EN_IPSEC int mlx5e_ipsec_init(struct mlx5e_priv *priv); void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); From e5c3b988b827c76f52d0f62343e863b9133a0cd2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 31 Jan 2024 16:17:12 -0800 Subject: [PATCH 187/187] Linux 6.1.76 Link: https://lore.kernel.org/r/20240129165958.589924174@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Shuah Khan Tested-by: Allen Pais Tested-by: Florian Fainelli Tested-by: kernelci.org bot Tested-by: Sven Joachim Tested-by: Yann Sionneau Link: https://lore.kernel.org/r/20240130183318.454044155@linuxfoundation.org Tested-by: Salvatore Bonaccorso Tested-by: SeongJae Park Tested-by: Kelsey Steele Tested-by: Florian Fainelli Tested-by: Ron Economos Tested-by: Jon Hunter Tested-by: Linux Kernel Functional Testing Tested-by: Pavel Machek (CIP) Tested-by: Yann Sionneau Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7cd49d9eadbf..9dd167178ab4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 75 +SUBLEVEL = 76 EXTRAVERSION = NAME = Curry Ramen