diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata index 9ab0ef1dd1c7..299e0d1dc161 100644 --- a/Documentation/ABI/testing/sysfs-ata +++ b/Documentation/ABI/testing/sysfs-ata @@ -107,13 +107,14 @@ Description: described in ATA8 7.16 and 7.17. Only valid if the device is not a PM. - pio_mode: (RO) Transfer modes supported by the device when - in PIO mode. Mostly used by PATA device. + pio_mode: (RO) PIO transfer mode used by the device. + Mostly used by PATA devices. - xfer_mode: (RO) Current transfer mode + xfer_mode: (RO) Current transfer mode. Mostly used by + PATA devices. - dma_mode: (RO) Transfer modes supported by the device when - in DMA mode. Mostly used by PATA device. + dma_mode: (RO) DMA transfer mode used by the device. + Mostly used by PATA devices. class: (RO) Device class. Can be "ata" for disk, "atapi" for packet device, "pmp" for PM, or diff --git a/Documentation/ABI/testing/sysfs-bus-iio-vf610 b/Documentation/ABI/testing/sysfs-bus-iio-vf610 index 308a6756d3bf..491ead804488 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-vf610 +++ b/Documentation/ABI/testing/sysfs-bus-iio-vf610 @@ -1,4 +1,4 @@ -What: /sys/bus/iio/devices/iio:deviceX/conversion_mode +What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode KernelVersion: 4.2 Contact: linux-iio@vger.kernel.org Description: diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 1a9b6ac22c38..bd848ce0e6d6 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -519,6 +519,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/srbds /sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/itlb_multihit + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index ca4dbdd9016d..2adec1e6520a 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -15,3 +15,4 @@ are configurable at compile, boot or run time. tsx_async_abort multihit.rst special-register-buffer-data-sampling.rst + processor_mmio_stale_data.rst diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst new file mode 100644 index 000000000000..9393c50b5afc --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -0,0 +1,246 @@ +========================================= +Processor MMIO Stale Data Vulnerabilities +========================================= + +Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O +(MMIO) vulnerabilities that can expose data. The sequences of operations for +exposing data range from simple to very complex. Because most of the +vulnerabilities require the attacker to have access to MMIO, many environments +are not affected. System environments using virtualization where MMIO access is +provided to untrusted guests may need mitigation. These vulnerabilities are +not transient execution attacks. However, these vulnerabilities may propagate +stale data into core fill buffers where the data can subsequently be inferred +by an unmitigated transient execution attack. Mitigation for these +vulnerabilities includes a combination of microcode update and software +changes, depending on the platform and usage model. Some of these mitigations +are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or +those used to mitigate Special Register Buffer Data Sampling (SRBDS). + +Data Propagators +================ +Propagators are operations that result in stale data being copied or moved from +one microarchitectural buffer or register to another. Processor MMIO Stale Data +Vulnerabilities are operations that may result in stale data being directly +read into an architectural, software-visible state or sampled from a buffer or +register. + +Fill Buffer Stale Data Propagator (FBSDP) +----------------------------------------- +Stale data may propagate from fill buffers (FB) into the non-coherent portion +of the uncore on some non-coherent writes. Fill buffer propagation by itself +does not make stale data architecturally visible. Stale data must be propagated +to a location where it is subject to reading or sampling. + +Sideband Stale Data Propagator (SSDP) +------------------------------------- +The sideband stale data propagator (SSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. The sideband response buffer is +shared by all client cores. For non-coherent reads that go to sideband +destinations, the uncore logic returns 64 bytes of data to the core, including +both requested data and unrequested stale data, from a transaction buffer and +the sideband response buffer. As a result, stale data from the sideband +response and transaction buffers may now reside in a core fill buffer. + +Primary Stale Data Propagator (PSDP) +------------------------------------ +The primary stale data propagator (PSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. Similar to the sideband response +buffer, the primary response buffer is shared by all client cores. For some +processors, MMIO primary reads will return 64 bytes of data to the core fill +buffer including both requested data and unrequested stale data. This is +similar to the sideband stale data propagator. + +Vulnerabilities +=============== +Device Register Partial Write (DRPW) (CVE-2022-21166) +----------------------------------------------------- +Some endpoint MMIO registers incorrectly handle writes that are smaller than +the register size. Instead of aborting the write or only copying the correct +subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than +specified by the write transaction may be written to the register. On +processors affected by FBSDP, this may expose stale data from the fill buffers +of the core that created the write transaction. + +Shared Buffers Data Sampling (SBDS) (CVE-2022-21125) +---------------------------------------------------- +After propagators may have moved data around the uncore and copied stale data +into client core fill buffers, processors affected by MFBDS can leak data from +the fill buffer. It is limited to the client (including Intel Xeon server E3) +uncore implementation. + +Shared Buffers Data Read (SBDR) (CVE-2022-21123) +------------------------------------------------ +It is similar to Shared Buffer Data Sampling (SBDS) except that the data is +directly read into the architectural software-visible state. It is limited to +the client (including Intel Xeon server E3) uncore implementation. + +Affected Processors +=================== +Not all the CPUs are affected by all the variants. For instance, most +processors for the server market (excluding Intel Xeon E3 processors) are +impacted by only Device Register Partial Write (DRPW). + +Below is the list of affected Intel processors [#f1]_: + + =================== ============ ========= + Common name Family_Model Steppings + =================== ============ ========= + HASWELL_X 06_3FH 2,4 + SKYLAKE_L 06_4EH 3 + BROADWELL_X 06_4FH All + SKYLAKE_X 06_55H 3,4,6,7,11 + BROADWELL_D 06_56H 3,4,5 + SKYLAKE 06_5EH 3 + ICELAKE_X 06_6AH 4,5,6 + ICELAKE_D 06_6CH 1 + ICELAKE_L 06_7EH 5 + ATOM_TREMONT_D 06_86H All + LAKEFIELD 06_8AH 1 + KABYLAKE_L 06_8EH 9 to 12 + ATOM_TREMONT 06_96H 1 + ATOM_TREMONT_L 06_9CH 0 + KABYLAKE 06_9EH 9 to 13 + COMETLAKE 06_A5H 2,3,5 + COMETLAKE_L 06_A6H 0,1 + ROCKETLAKE 06_A7H 1 + =================== ============ ========= + +If a CPU is in the affected processor list, but not affected by a variant, it +is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later +section, mitigation largely remains the same for all the variants, i.e. to +clear the CPU fill buffers via VERW instruction. + +New bits in MSRs +================ +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +specific variants of Processor MMIO Stale Data vulnerabilities and mitigation +capability. + +MSR IA32_ARCH_CAPABILITIES +-------------------------- +Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the + Shared Buffers Data Read (SBDR) vulnerability or the sideband stale + data propagator (SSDP). +Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer + Stale Data Propagator (FBSDP). +Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data + Propagator (PSDP). +Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer + values as part of MD_CLEAR operations. Processors that do not + enumerate MDS_NO (meaning they are affected by MDS) but that do + enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate + FB_CLEAR as part of their MD_CLEAR support. +Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR + IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS + bit can be set to cause the VERW instruction to not perform the + FB_CLEAR action. Not all processors that support FB_CLEAR will support + FB_CLEAR_CTRL. + +MSR IA32_MCU_OPT_CTRL +--------------------- +Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR +action. This may be useful to reduce the performance impact of FB_CLEAR in +cases where system software deems it warranted (for example, when performance +is more critical, or the untrusted software has no MMIO access). Note that +FB_CLEAR_DIS has no impact on enumeration (for example, it does not change +FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors +that enumerate FB_CLEAR. + +Mitigation +========== +Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the +same mitigation strategy to force the CPU to clear the affected buffers before +an attacker can extract the secrets. + +This is achieved by using the otherwise unused and obsolete VERW instruction in +combination with a microcode update. The microcode clears the affected CPU +buffers when the VERW instruction is executed. + +Kernel reuses the MDS function to invoke the buffer clearing: + + mds_clear_cpu_buffers() + +On MDS affected CPUs, the kernel already invokes CPU buffer clear on +kernel/userspace, hypervisor/guest and C-state (idle) transitions. No +additional mitigation is needed on such CPUs. + +For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker +with MMIO capability. Therefore, VERW is not required for kernel/userspace. For +virtualization case, VERW is only needed at VMENTER for a guest with MMIO +capability. + +Mitigation points +----------------- +Return to user space +^^^^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation +needed. + +C-State transition +^^^^^^^^^^^^^^^^^^ +Control register writes by CPU during C-state transition can propagate data +from fill buffer to uncore buffers. Execute VERW before C-state transition to +clear CPU fill buffers. + +Guest entry point +^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise +execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by +MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO +Stale Data vulnerabilities, so there is no need to execute VERW for such guests. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control the Processor MMIO Stale Data +mitigations at boot time with the option "mmio_stale_data=". The valid +arguments for this option are: + + ========== ================================================================= + full If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and when entering a VM. Idle transitions are + protected as well. It does not automatically disable SMT. + full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the + complete mitigation. + off Disables mitigation completely. + ========== ================================================================= + +If the CPU is affected and mmio_stale_data=off is not supplied on the kernel +command line, then the kernel selects the appropriate mitigation. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The processor is vulnerable, but microcode is not updated. The + mitigation is enabled on a best effort basis. + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +If the processor is vulnerable then the following information is appended to +the above information: + + ======================== =========================================== + 'SMT vulnerable' SMT is enabled + 'SMT disabled' SMT is disabled + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown + ======================== =========================================== + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 6bd97cd50d62..7e061ed449aa 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -422,6 +422,14 @@ The possible values in this file are: 'RSB filling' Protection of RSB on context switch enabled ============= =========================================== + - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status: + + =========================== ======================================================= + 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled + 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable + 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB + =========================== ======================================================= + Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will report vulnerability. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ce9e964d7bfb..f7c921b029f6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2932,6 +2932,8 @@ kvm.nx_huge_pages=off [X86] no_entry_flush [PPC] no_uaccess_flush [PPC] + mmio_stale_data=off [X86] + retbleed=off [X86] Exceptions: This does not have any effect on @@ -2953,6 +2955,8 @@ Equivalent to: l1tf=flush,nosmt [X86] mds=full,nosmt [X86] tsx_async_abort=full,nosmt [X86] + mmio_stale_data=full,nosmt [X86] + retbleed=auto,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this @@ -2962,6 +2966,40 @@ log everything. Information is printed at KERN_DEBUG so loglevel=8 may also need to be specified. + mmio_stale_data= + [X86,INTEL] Control mitigation for the Processor + MMIO Stale Data vulnerabilities. + + Processor MMIO Stale Data is a class of + vulnerabilities that may expose data after an MMIO + operation. Exposed data could originate or end in + the same CPU buffers as affected by MDS and TAA. + Therefore, similar to MDS and TAA, the mitigation + is to clear the affected CPU buffers. + + This parameter controls the mitigation. The + options are: + + full - Enable mitigation on vulnerable CPUs + + full,nosmt - Enable mitigation and disable SMT on + vulnerable CPUs. + + off - Unconditionally disable mitigation + + On MDS or TAA affected machines, + mmio_stale_data=off can be prevented by an active + MDS or TAA mitigation as these vulnerabilities are + mitigated with the same mechanism so in order to + disable this mitigation, you need to specify + mds=off and tsx_async_abort=off too. + + Not specifying this option is equivalent to + mmio_stale_data=full. + + For details see: + Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst + module.sig_enforce [KNL] When CONFIG_MODULE_SIG is set, this means that modules without (valid) signatures will fail to load. @@ -4136,6 +4174,12 @@ fully seed the kernel's CRNG. Default is controlled by CONFIG_RANDOM_TRUST_CPU. + random.trust_bootloader={on,off} + [KNL] Enable or disable trusting the use of a + seed passed by the bootloader (if available) to + fully seed the kernel's CRNG. Default is controlled + by CONFIG_RANDOM_TRUST_BOOTLOADER. + ras=option[,option,...] [KNL] RAS-specific options cec_disable [X86] @@ -4715,6 +4759,30 @@ retain_initrd [RAM] Keep initrd memory after extraction + retbleed= [X86] Control mitigation of RETBleed (Arbitrary + Speculative Code Execution with Return Instructions) + vulnerability. + + off - no mitigation + auto - automatically select a migitation + auto,nosmt - automatically select a mitigation, + disabling SMT if necessary for + the full mitigation (only on Zen1 + and older without STIBP). + ibpb - mitigate short speculation windows on + basic block boundaries too. Safe, highest + perf impact. + unret - force enable untrained return thunks, + only effective on AMD f15h-f17h + based systems. + unret,nosmt - like unret, will disable SMT when STIBP + is not available. + + Selecting 'auto' will choose a mitigation method at run + time according to the CPU. + + Not specifying this option is equivalent to retbleed=auto. + rfkill.default_state= 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, etc. communication is blocked by default. @@ -5064,6 +5132,7 @@ eibrs - enhanced IBRS eibrs,retpoline - enhanced IBRS + Retpolines eibrs,lfence - enhanced IBRS + LFENCE + ibrs - use IBRS to protect kernel Not specifying this option is equivalent to spectre_v2=auto. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index e338306f4587..a4b1ebc2e70b 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1006,28 +1006,22 @@ This is a directory, with the following entries: * ``boot_id``: a UUID generated the first time this is retrieved, and unvarying after that; +* ``uuid``: a UUID generated every time this is retrieved (this can + thus be used to generate UUIDs at will); + * ``entropy_avail``: the pool's entropy count, in bits; * ``poolsize``: the entropy pool size, in bits; * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum - number of seconds between urandom pool reseeding). - -* ``uuid``: a UUID generated every time this is retrieved (this can - thus be used to generate UUIDs at will); + number of seconds between urandom pool reseeding). This file is + writable for compatibility purposes, but writing to it has no effect + on any RNG behavior; * ``write_wakeup_threshold``: when the entropy count drops below this (as a number of bits), processes waiting to write to ``/dev/random`` - are woken up. - -If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH`` -defined, these additional entries are present: - -* ``add_interrupt_avg_cycles``: the average number of cycles between - interrupts used to feed the pool; - -* ``add_interrupt_avg_deviation``: the standard deviation seen on the - number of cycles between interrupts used to feed the pool. + are woken up. This file is writable for compatibility purposes, but + writing to it has no effect on any RNG behavior. randomize_va_space diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 805c073fd5e6..4ff8a9379ba9 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -166,6 +166,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1024718 | +----------------+-----------------+-----------------+-----------------------------+ +| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1286807 | ++----------------+-----------------+-----------------+-----------------------------+ + +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/conf.py b/Documentation/conf.py index ed2b43ec7754..f47c3f1682db 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -176,7 +176,7 @@ finally: # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst index 17706dc91ec9..1887d92e8e92 100644 --- a/Documentation/core-api/dma-attributes.rst +++ b/Documentation/core-api/dma-attributes.rst @@ -130,11 +130,3 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged subsystem that the buffer is fully accessible at the elevated privilege level (and ideally inaccessible or at least read-only at the lesser-privileged levels). - -DMA_ATTR_OVERWRITE ------------------- - -This is a hint to the DMA-mapping subsystem that the device is expected to -overwrite the entire mapped size, thus the caller does not require any of the -previous buffer contents to be preserved. This allows bounce-buffering -implementations to optimise DMA_FROM_DEVICE transfers. diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml index 0cebaaefda03..419c3b2ac5a6 100644 --- a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml +++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml @@ -72,6 +72,7 @@ examples: dc-gpios = <&gpio 43 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio 80 GPIO_ACTIVE_HIGH>; rotation = <270>; + backlight = <&backlight>; }; }; diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml index 372679dbd216..7e250ce136ee 100644 --- a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml +++ b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml @@ -61,7 +61,7 @@ if: then: properties: clocks: - maxItems: 2 + minItems: 2 required: - clock-names diff --git a/Documentation/devicetree/bindings/gpio/gpio-altera.txt b/Documentation/devicetree/bindings/gpio/gpio-altera.txt index 146e554b3c67..2a80e272cd66 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-altera.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-altera.txt @@ -9,8 +9,9 @@ Required properties: - The second cell is reserved and is currently unused. - gpio-controller : Marks the device node as a GPIO controller. - interrupt-controller: Mark the device node as an interrupt controller -- #interrupt-cells : Should be 1. The interrupt type is fixed in the hardware. +- #interrupt-cells : Should be 2. The interrupt type is fixed in the hardware. - The first cell is the GPIO offset number within the GPIO controller. + - The second cell is the interrupt trigger type and level flags. - interrupts: Specify the interrupt. - altr,interrupt-type: Specifies the interrupt trigger type the GPIO hardware is synthesized. This field is required if the Altera GPIO controller @@ -38,6 +39,6 @@ gpio_altr: gpio@ff200000 { altr,interrupt-type = ; #gpio-cells = <2>; gpio-controller; - #interrupt-cells = <1>; + #interrupt-cells = <2>; interrupt-controller; }; diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml index c78ab7e2eee7..fa83c69820f8 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml @@ -58,7 +58,7 @@ patternProperties: $ref: "/schemas/types.yaml#/definitions/string" enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, EMMCG4, - EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP, + EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml b/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml index ef5698f426b2..392204a08e96 100644 --- a/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml +++ b/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml @@ -45,6 +45,7 @@ properties: maxItems: 2 interconnect-names: + minItems: 1 items: - const: qspi-config - const: qspi-memory diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 40278344dc48..eedce7474ad3 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -988,7 +988,7 @@ cipso_cache_enable - BOOLEAN cipso_cache_bucket_size - INTEGER The CIPSO label cache consists of a fixed size hash table with each hash bucket containing a number of cache entries. This variable limits - the number of entries in each hash bucket; the larger the value the + the number of entries in each hash bucket; the larger the value is, the more CIPSO label mappings that can be cached. When the number of entries in a given hash bucket reaches this limit adding new entries causes the oldest entry in the bucket to be removed to make room. @@ -1093,7 +1093,7 @@ ip_autobind_reuse - BOOLEAN option should only be set by experts. Default: 0 -ip_dynaddr - BOOLEAN +ip_dynaddr - INTEGER If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log message will be printed when dynamic address rewriting @@ -2642,7 +2642,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max Default: 4K sctp_wmem - vector of 3 INTEGERs: min, default, max - Currently this tunable has no effect. + Only the first value ("min") is used, "default" and "max" are + ignored. + + min: Minimum size of send buffer that can be used by SCTP sockets. + It is guaranteed to each SCTP socket (but not association) even + under moderate memory pressure. + + Default: 4K addr_scope_policy - INTEGER Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 5a85fcc80c76..557b97483437 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -10,18 +10,177 @@ Introduction The following is a random collection of documentation regarding network devices. -struct net_device allocation rules -================================== +struct net_device lifetime rules +================================ Network device structures need to persist even after module is unloaded and must be allocated with alloc_netdev_mqs() and friends. If device has registered successfully, it will be freed on last use -by free_netdev(). This is required to handle the pathologic case cleanly -(example: rmmod mydriver needs_free_netdev = true; + } + + static void my_destructor(struct net_device *dev) + { + some_obj_destroy(priv->obj); + some_uninit(priv); + } + + int create_link() + { + struct my_device_priv *priv; + int err; + + ASSERT_RTNL(); + + dev = alloc_netdev(sizeof(*priv), "net%d", NET_NAME_UNKNOWN, my_setup); + if (!dev) + return -ENOMEM; + priv = netdev_priv(dev); + + /* Implicit constructor */ + err = some_init(priv); + if (err) + goto err_free_dev; + + priv->obj = some_obj_create(); + if (!priv->obj) { + err = -ENOMEM; + goto err_some_uninit; + } + /* End of constructor, set the destructor: */ + dev->priv_destructor = my_destructor; + + err = register_netdevice(dev); + if (err) + /* register_netdevice() calls destructor on failure */ + goto err_free_dev; + + /* If anything fails now unregister_netdevice() (or unregister_netdev()) + * will take care of calling my_destructor and free_netdev(). + */ + + return 0; + + err_some_uninit: + some_uninit(priv); + err_free_dev: + free_netdev(dev); + return err; + } + +If struct net_device.priv_destructor is set it will be called by the core +some time after unregister_netdevice(), it will also be called if +register_netdevice() fails. The callback may be invoked with or without +``rtnl_lock`` held. + +There is no explicit constructor callback, driver "constructs" the private +netdev state after allocating it and before registration. + +Setting struct net_device.needs_free_netdev makes core call free_netdevice() +automatically after unregister_netdevice() when all references to the device +are gone. It only takes effect after a successful call to register_netdevice() +so if register_netdevice() fails driver is responsible for calling +free_netdev(). + +free_netdev() is safe to call on error paths right after unregister_netdevice() +or when register_netdevice() fails. Parts of netdev (de)registration process +happen after ``rtnl_lock`` is released, therefore in those cases free_netdev() +will defer some of the processing until ``rtnl_lock`` is released. + +Devices spawned from struct rtnl_link_ops should never free the +struct net_device directly. + +.ndo_init and .ndo_uninit +~~~~~~~~~~~~~~~~~~~~~~~~~ + +``.ndo_init`` and ``.ndo_uninit`` callbacks are called during net_device +registration and de-registration, under ``rtnl_lock``. Drivers can use +those e.g. when parts of their init process need to run under ``rtnl_lock``. + +``.ndo_init`` runs before device is visible in the system, ``.ndo_uninit`` +runs during de-registering after device is closed but other subsystems +may still have outstanding references to the netdevice. MTU === diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 5a267f5d1a50..edd263e0992d 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -71,7 +71,7 @@ as you intend it to. The maintainer will thank you if you write your patch description in a form which can be easily pulled into Linux's source code management -system, ``git``, as a "commit log". See :ref:`explicit_in_reply_to`. +system, ``git``, as a "commit log". See :ref:`the_canonical_patch_format`. Solve only one problem per patch. If your description starts to get long, that's a sign that you probably need to split up your patch. diff --git a/MAINTAINERS b/MAINTAINERS index e492b0e3a4c5..007bdd1bc2f2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14702,6 +14702,8 @@ F: arch/mips/generic/board-ranchu.c RANDOM NUMBER DRIVER M: "Theodore Ts'o" +M: Jason A. Donenfeld +T: git https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git S: Maintained F: drivers/char/random.c @@ -19277,7 +19279,8 @@ F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* XFS FILESYSTEM -M: Darrick J. Wong +M: Amir Goldstein +M: Darrick J. Wong M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org S: Supported diff --git a/Makefile b/Makefile index 590527068664..d32763b3d78e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 117 +SUBLEVEL = 136 EXTRAVERSION = NAME = Dare mighty things @@ -688,12 +688,21 @@ ifdef CONFIG_FUNCTION_TRACER CC_FLAGS_FTRACE := -pg endif -RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register -RETPOLINE_VDSO_CFLAGS_GCC := -mindirect-branch=thunk-inline -mindirect-branch-register -RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk -RETPOLINE_VDSO_CFLAGS_CLANG := -mretpoline -RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) -RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_VDSO_CFLAGS_CLANG))) +ifdef CONFIG_CC_IS_GCC +RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) +RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) +RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) +endif +ifdef CONFIG_CC_IS_CLANG +RETPOLINE_CFLAGS := -mretpoline-external-thunk +RETPOLINE_VDSO_CFLAGS := -mretpoline +endif + +ifdef CONFIG_RETHUNK +RETHUNK_CFLAGS := -mfunction-return=thunk-extern +RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) +endif + export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS @@ -1254,7 +1263,7 @@ KBUILD_MODULES := 1 autoksyms_recursive: descend modules.order $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \ - "$(MAKE) -f $(srctree)/Makefile vmlinux" + "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive" endif autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index dee2bfb4bd31..9df93bbe19c7 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -4547,6 +4547,7 @@ + @@ -7108,7 +7109,7 @@ - + @@ -7351,7 +7352,17 @@ - + + + + + + + + + + + @@ -8253,7 +8264,7 @@ - + @@ -8676,15 +8687,15 @@ - + - + - + - + @@ -9468,6 +9479,14 @@ + + + + + + + + @@ -10754,6 +10773,11 @@ + + + + + @@ -11936,18 +11960,18 @@ - + - + - + - + - + @@ -12986,7 +13010,7 @@ - + @@ -14741,42 +14765,42 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -15173,110 +15197,115 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -15653,6 +15682,7 @@ + @@ -15717,45 +15747,45 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -16059,33 +16089,33 @@ - + - + - + - + - + - + - + - + - + - + @@ -16938,6 +16968,14 @@ + + + + + + + + @@ -18054,27 +18092,27 @@ - + - + - + - + - + - + - + - + @@ -18485,6 +18523,7 @@ + @@ -18680,42 +18719,42 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -18799,6 +18838,7 @@ + @@ -19640,7 +19680,7 @@ - + @@ -21182,6 +21222,10 @@ + + + + @@ -21673,6 +21717,11 @@ + + + + + @@ -24637,6 +24686,14 @@ + + + + + + + + @@ -24909,6 +24966,10 @@ + + + + @@ -25186,6 +25247,11 @@ + + + + + @@ -25846,6 +25912,7 @@ + @@ -28088,100 +28155,100 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -30207,15 +30274,15 @@ - + - + - + - + @@ -31842,6 +31909,7 @@ + @@ -32221,6 +32289,7 @@ + @@ -33869,72 +33938,72 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -34278,12 +34347,12 @@ - + - + - + @@ -35356,15 +35425,15 @@ - + - + - + - + @@ -37083,7 +37152,7 @@ - + @@ -37582,7 +37651,7 @@ - + @@ -37941,153 +38010,153 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -38412,51 +38481,51 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -38708,7 +38777,7 @@ - + @@ -40338,6 +40407,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -43097,39 +43186,39 @@ - + - + - + - + - + - + - + - + - + - + - + - + @@ -44091,23 +44180,7 @@ - - - - - - - - - - - - - - - - - + @@ -44869,7 +44942,7 @@ - + @@ -48399,7 +48472,6 @@ - @@ -48486,9 +48558,9 @@ - + - + @@ -48542,7 +48614,7 @@ - + @@ -50307,7 +50379,7 @@ - + @@ -50673,6 +50745,7 @@ + @@ -53139,6 +53212,7 @@ + @@ -53281,13 +53355,6 @@ - - - - - - - @@ -54076,6 +54143,17 @@ + + + + + + + + + + + @@ -56567,39 +56645,39 @@ - + - + - + - + - + - + - + - + - + - + - + - + @@ -60589,24 +60667,24 @@ - + - + - + - + - + - + - + @@ -61673,6 +61751,11 @@ + + + + + @@ -61747,6 +61830,20 @@ + + + + + + + + + + + + + + @@ -61785,12 +61882,12 @@ - + - + - + @@ -62426,21 +62523,21 @@ - + - + - + - + - + - + @@ -62967,7 +63064,7 @@ - + @@ -63614,7 +63711,7 @@ - + @@ -63701,102 +63798,102 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -64421,27 +64518,27 @@ - + - + - + - + - + - + - + - + @@ -65457,7 +65554,7 @@ - + @@ -65835,13 +65932,6 @@ - - - - - - - @@ -70777,6 +70867,7 @@ + @@ -72557,12 +72648,12 @@ - + - + - + @@ -72638,6 +72729,12 @@ + + + + + + @@ -73076,7 +73173,7 @@ - + @@ -73322,7 +73419,7 @@ - + @@ -73591,7 +73688,7 @@ - + @@ -78830,6 +78927,17 @@ + + + + + + + + + + + @@ -78929,7 +79037,7 @@ - + @@ -79154,21 +79262,21 @@ - + - + - + - + - + - + @@ -82105,24 +82213,24 @@ - + - + - + - + - + - + - + @@ -82999,9 +83107,6 @@ - - - @@ -85356,6 +85461,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -85707,72 +85832,72 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -87936,12 +88061,12 @@ - + - + - + @@ -88710,45 +88835,45 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -90301,6 +90426,7 @@ + @@ -90391,7 +90517,6 @@ - @@ -91942,9 +92067,6 @@ - - - @@ -92102,51 +92224,51 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -93041,99 +93163,99 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -97614,51 +97736,51 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -98076,12 +98198,12 @@ - + - + - + @@ -99729,15 +99851,15 @@ - + - + - + - + @@ -101063,7 +101185,7 @@ - + @@ -101805,6 +101927,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -102047,12 +102189,6 @@ - - - - - - @@ -103068,6 +103204,7 @@ + @@ -104737,15 +104874,15 @@ - + - + - + - + @@ -105166,20 +105303,7 @@ - - - - - - - - - - - - - - + @@ -108966,6 +109090,11 @@ + + + + + @@ -110284,7 +110413,6 @@ - @@ -111472,15 +111600,15 @@ - + - + - + - + @@ -111654,87 +111782,87 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -112298,7 +112426,7 @@ - + @@ -113764,33 +113892,33 @@ - + - + - + - + - + - + - + - + - + - + @@ -115331,11 +115459,11 @@ - + - - + + @@ -115362,11 +115490,11 @@ - - - - - + + + + + @@ -115461,9 +115589,9 @@ - - - + + + @@ -115656,55 +115784,55 @@ - - - - + + + + - - - + + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - + + + + - - - - + + + + - - - - + + + + @@ -115714,9 +115842,9 @@ - - - + + + @@ -115855,21 +115983,21 @@ - - - - + + + + - - - - + + + + - - - + + + @@ -115996,9 +116124,9 @@ - - - + + + @@ -116051,9 +116179,9 @@ - - - + + + @@ -116367,9 +116495,9 @@ - - - + + + @@ -116471,12 +116599,12 @@ - - + + - - + + @@ -116494,8 +116622,8 @@ - - + + @@ -116546,7 +116674,7 @@ - + @@ -116564,13 +116692,13 @@ - - - + + + - - + + @@ -116638,34 +116766,34 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + @@ -116805,9 +116933,9 @@ - - - + + + @@ -116922,10 +117050,10 @@ - - - - + + + + @@ -120144,10 +120272,10 @@ - - - - + + + + @@ -120320,11 +120448,11 @@ - - - - - + + + + + @@ -120363,48 +120491,48 @@ - - - - - - - + - - - - - - - + - + + + + + + + + + + + + + + + + + + + - + - - - - - - @@ -120546,12 +120674,12 @@ - - - - - - + + + + + + @@ -120564,13 +120692,13 @@ - - + + - - - + + + @@ -120586,8 +120714,8 @@ - - + + @@ -120636,9 +120764,9 @@ - - - + + + @@ -120747,13 +120875,13 @@ - - - - - - - + + + + + + + @@ -120762,9 +120890,9 @@ - - - + + + @@ -120800,9 +120928,9 @@ - - - + + + @@ -120846,7 +120974,7 @@ - + @@ -121316,16 +121444,16 @@ - - + + - - + + - - + + @@ -121333,22 +121461,22 @@ - - - + + + - - - + + + - - + + - - + + @@ -121364,15 +121492,15 @@ - - + + - - - - - + + + + + @@ -121389,23 +121517,23 @@ - - - + + + - - + + - - - + + + - - - + + + @@ -121428,22 +121556,22 @@ - - + + - - - + + + - - + + - - - + + + @@ -121468,9 +121596,9 @@ - - - + + + @@ -121483,10 +121611,10 @@ - - - - + + + + @@ -121808,10 +121936,10 @@ - - - - + + + + @@ -121828,7 +121956,7 @@ - + @@ -121955,8 +122083,8 @@ - - + + @@ -122009,12 +122137,12 @@ - - + + - - + + @@ -122050,13 +122178,13 @@ - - + + - - - + + + @@ -122064,13 +122192,13 @@ - - + + - - - + + + @@ -122091,9 +122219,9 @@ - - - + + + @@ -122951,16 +123079,16 @@ - - + + - + - + @@ -122988,10 +123116,10 @@ - + - + @@ -123184,7 +123312,7 @@ - + @@ -123242,10 +123370,10 @@ - + - + @@ -123854,26 +123982,26 @@ - - - - + + + + - - + + - - - - + + + + - - - - + + + + @@ -124107,8 +124235,8 @@ - - + + @@ -124153,16 +124281,16 @@ - - - - + + + + - - - - + + + + @@ -124186,20 +124314,20 @@ - - + + - - - - + + + + - - - + + + @@ -124226,9 +124354,9 @@ - - - + + + @@ -124558,38 +124686,38 @@ - - - - + + + + - - - - + + + + - - + + - - - - + + + + - - - + + + - - - + + + @@ -124607,11 +124735,11 @@ - - - - - + + + + + @@ -124716,8 +124844,8 @@ - - + + @@ -124726,67 +124854,67 @@ - - - + + + - - + + - - + + - - - - - - + + + + + + - - - + + + - - - + + + - - - - - - - + + + + + + + - - + + - - - + + + - - - - + + + + - - - - + + + + @@ -124826,38 +124954,38 @@ - - + + - - - - + + + + - - + + - - - + + + - - - + + + - - - + + + - - - + + + @@ -124903,42 +125031,42 @@ - - + + - - + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + @@ -124951,34 +125079,34 @@ - - - - + + + + - - - - + + + + - - - - - + + + + + - - - - - + + + + + - - + + @@ -125107,19 +125235,19 @@ - - - + + + - - - + + + - - - + + + @@ -125268,9 +125396,9 @@ - - - + + + @@ -125306,8 +125434,8 @@ - - + + @@ -125505,9 +125633,9 @@ - - - + + + @@ -125604,20 +125732,20 @@ - - - - - - - - + + + + + + + + - - - - + + + + @@ -125724,15 +125852,15 @@ - - + + - - - - - + + + + + @@ -126533,9 +126661,9 @@ - - - + + + @@ -126547,157 +126675,157 @@ - - + + - - + + - - - + + + - - + + - - - + + + - - + + - - - + + + - - + + - - - - + + + + - - + + - - - + + + - - + + - - - - + + + + - - - - + + + + - - - + + + - - - + + + - - + + - - + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - - - - + + + + + - - + + - - - + + + - - - - + + + + - - - - + + + + - - - + + + - - - + + + - - + + @@ -126804,13 +126932,13 @@ - - - - - - - + + + + + + + @@ -126841,52 +126969,52 @@ - - - + + + - - - - + + + + - - + + - - - + + + - - + + - - - + + + - - - + + + - - - - + + + + - - + + - - + + @@ -126930,45 +127058,45 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - + + - - - - - - - + + + + + + + - - - - - - + + + + + + @@ -126984,77 +127112,77 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - + + - - + + - - - + + + - - - + + + - - - - - - - - - - - - - + + + + + + + + + + + + + - - + + - - - - + + + + - - - + + + - - - + + + @@ -127674,9 +127802,9 @@ - - - + + + @@ -127704,17 +127832,17 @@ - - + + - - - - - - - + + + + + + + @@ -129225,10 +129353,10 @@ - - - - + + + + @@ -129243,8 +129371,8 @@ - - + + @@ -129563,17 +129691,17 @@ - - + + - - - + + + - - + + @@ -129862,10 +129990,10 @@ - - - - + + + + @@ -129918,18 +130046,18 @@ - - + + - - - + + + - - - + + + @@ -130241,8 +130369,8 @@ - - + + @@ -130683,25 +130811,25 @@ - - - + + + - - - + + + - - + + - - - - - + + + + + @@ -130776,20 +130904,20 @@ - - - + + + - - - + + + - + - + @@ -130877,12 +131005,12 @@ - - + + - - + + @@ -131246,8 +131374,8 @@ - - + + @@ -131511,12 +131639,12 @@ - - + + - - + + @@ -131908,8 +132036,8 @@ - - + + @@ -131919,8 +132047,8 @@ - - + + @@ -132268,8 +132396,8 @@ - - + + @@ -132349,22 +132477,22 @@ - - + + - + - - + + - - - - - + + + + + @@ -132382,8 +132510,8 @@ - - + + @@ -132428,35 +132556,35 @@ - - + + - - + + - - + + - - + + - - - - - - - + + + + + + + - - - - + + + + @@ -132464,9 +132592,9 @@ - - - + + + @@ -132474,16 +132602,16 @@ - - + + - - + + - - + + @@ -133428,25 +133556,25 @@ - - - - + + + + - - - - - + + + + + - - + + @@ -133962,51 +134090,51 @@ - + - - + + - - + + - - + + - + - + - - + + - + - - + + - + - - + + - - + + - - + + @@ -134015,8 +134143,8 @@ - - + + @@ -134024,10 +134152,10 @@ - - - - + + + + @@ -135259,12 +135387,12 @@ - - - - - - + + + + + + @@ -135337,9 +135465,9 @@ - - - + + + @@ -135347,22 +135475,22 @@ - - + + - - - + + + - - - + + + - - + + @@ -135393,9 +135521,9 @@ - - - + + + @@ -135403,88 +135531,88 @@ - - + + - - - + + + - - - - + + + + - - - + + + - - - + + + - - - - - - + + + + + + - - - - + + + + - - - - + + + + - - + + - - - + + + - - + + - - - - + + + + - - - + + + - - - + + + @@ -135504,31 +135632,31 @@ - - - - - + + + + + - - + + - - + + - - + + - - + + - - + + @@ -135551,13 +135679,13 @@ - - - + + + - - + + @@ -136684,14 +136812,14 @@ - - - - + + + + - - + + @@ -136700,12 +136828,12 @@ - - + + - - + + @@ -136877,22 +137005,22 @@ - - - - + + + + - - + + - - + + @@ -136923,12 +137051,12 @@ - - + + - - + + @@ -136943,8 +137071,8 @@ - - + + @@ -136964,17 +137092,17 @@ - - + + - - - + + + @@ -136996,8 +137124,8 @@ - - + + @@ -137118,9 +137246,9 @@ - - - + + + @@ -137214,13 +137342,13 @@ - - - + + + - - + + @@ -137228,9 +137356,9 @@ - - - + + + @@ -137247,29 +137375,29 @@ - - - - + + + + - - - + + + - - - - + + + + - - + + @@ -137290,17 +137418,17 @@ - - - + + + - - + + - - + + @@ -137321,8 +137449,8 @@ - - + + @@ -137377,11 +137505,11 @@ - - - - - + + + + + @@ -137403,17 +137531,17 @@ - - + + - - + + - - - + + + @@ -137446,8 +137574,8 @@ - - + + @@ -137457,21 +137585,21 @@ - - + + - - - + + + - - + + - - + + @@ -137497,12 +137625,12 @@ - - - - - - + + + + + + @@ -137546,10 +137674,10 @@ - - - - + + + + @@ -138381,50 +138509,50 @@ - - + + - - + + - - + + - - + + - - + + - - + + - - - + + + - - + + - - + + - - - + + + @@ -138578,7 +138706,7 @@ - + @@ -138625,36 +138753,36 @@ - - - - - + + + + + - - - - - - + + + + + + - - - - - - - + + + + + + + - - - - - - + + + + + + @@ -138689,37 +138817,37 @@ - - - + + + - - - - - + + + + + - - + + - - - + + + - - - - + + + + - - - - + + + + @@ -138811,8 +138939,8 @@ - - + + @@ -139054,7 +139182,7 @@ - + @@ -139103,9 +139231,9 @@ - - - + + + @@ -139121,20 +139249,20 @@ - - + + - - + + - - + + - - + + @@ -139290,12 +139418,12 @@ - - + + - - + + @@ -139367,13 +139495,13 @@ - - - - - - - + + + + + + + @@ -139406,9 +139534,9 @@ - - - + + + @@ -139433,13 +139561,13 @@ - - + + - - - + + + @@ -139513,29 +139641,29 @@ - - - + + + - - - + + + - - - + + + - - - - + + + + - - + + @@ -139543,90 +139671,90 @@ - - + + - - - + + + - - + + - - + + - - - + + + - - + + - - + + - - + + - - + + - - - + + + - - + + - - + + - - + + - - - - + + + + - - - + + + @@ -139668,24 +139796,24 @@ - - - - + + + + - - + + - - - + + + - - - + + + @@ -139693,10 +139821,10 @@ - - - - + + + + @@ -139705,24 +139833,24 @@ - - - + + + - - - + + + - - - - + + + + @@ -139730,34 +139858,34 @@ - - - - + + + + - - - - + + + + - - - + + + - - + + - - + + - - - + + + @@ -139787,8 +139915,8 @@ - - + + @@ -139797,14 +139925,14 @@ - - - + + + - - - + + + @@ -139895,8 +140023,8 @@ - - + + @@ -140051,6 +140179,9 @@ + + + @@ -140283,7 +140414,7 @@ - + @@ -140638,10 +140769,10 @@ - - - - + + + + @@ -141265,53 +141396,53 @@ - - - - + + + + - - - - - + + + + + - - - + + + - - - - + + + + - - + + - - + + - - + + - - - + + + - - + + - - + + @@ -141322,17 +141453,17 @@ - - + + - - + + - - - + + + @@ -141567,14 +141698,14 @@ - - + + - - - - + + + + @@ -141649,11 +141780,11 @@ - + - - + + @@ -142743,143 +142874,143 @@ - - - - + + + + - - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - + + - - + + - - - - - + + + + + - - - - - - + + + + + + - - - + + + - - - - + + + + - - + + - - - + + + - - - + + + - - - + + + - - + + - - + + - - - - + + + + @@ -143514,9 +143645,9 @@ - - - + + + @@ -143561,9 +143692,9 @@ - - - + + + @@ -143792,9 +143923,9 @@ - - - + + + @@ -143845,20 +143976,20 @@ - - - + + + - - + + - + @@ -143870,7 +144001,7 @@ - + @@ -144097,11 +144228,11 @@ - - - - - + + + + + @@ -144135,12 +144266,12 @@ - - - - - - + + + + + + @@ -144349,12 +144480,12 @@ - - + + - - + + @@ -144424,8 +144555,8 @@ - - + + @@ -144615,9 +144746,9 @@ - - - + + + @@ -144874,19 +145005,19 @@ - - + + - - - - - + + + + + @@ -145038,30 +145169,30 @@ - - + + - - + + - - + + - - - + + + - - - + + + - - + + @@ -145073,9 +145204,9 @@ - - - + + + @@ -145083,9 +145214,9 @@ - - - + + + @@ -145093,9 +145224,9 @@ - - - + + + @@ -145113,8 +145244,8 @@ - - + + @@ -145122,8 +145253,8 @@ - - + + @@ -145134,111 +145265,111 @@ - - + + - - - + + + - - - + + + - - - - - + + + + + - - - - - - + + + + + + - - - + + + - - + + - - - + + + - - - + + + - - - - - - + + + + + + - - - - - - + + + + + + - - + + - - - + + + - - - + + + - - - - - - - + + + + + + + - - - + + + - - - + + + - - + + - - - - + + + + @@ -145294,48 +145425,48 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - + + - - - + + + - - - - - + + + + + - - - - - - + + + + + + @@ -145345,162 +145476,162 @@ - - - + + + - - + + - - - - + + + + - - + + - - + + - - - + + + - - + + - - + + - - - - + + + + - - - + + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - - + + + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - - + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - + + + + @@ -145597,21 +145728,21 @@ - - + + - - + + - - - + + + @@ -146174,8 +146305,8 @@ - - + + @@ -146331,8 +146462,8 @@ - - + + @@ -147881,16 +148012,16 @@ - + - - - - - - - + + + + + + + @@ -148014,11 +148145,11 @@ - - - - - + + + + + @@ -148051,16 +148182,16 @@ - - - - + + + + - - - - + + + + @@ -148112,7 +148243,7 @@ - + @@ -148259,9 +148390,9 @@ - - - + + + @@ -148366,12 +148497,12 @@ - - - - - - + + + + + + @@ -148379,33 +148510,33 @@ - - - - + + + + - - - - - - - - - - + + + + + + + + + + - - - - + + + + - - - + + + @@ -148429,19 +148560,19 @@ - - - + + + - - - - + + + + @@ -148454,13 +148585,13 @@ - - - + + + - - + + @@ -148478,9 +148609,9 @@ - - - + + + @@ -148503,14 +148634,14 @@ - - - + + + - - - + + + @@ -148540,9 +148671,9 @@ - - - + + + @@ -148555,8 +148686,8 @@ - - + + @@ -148595,11 +148726,11 @@ - - - - - + + + + + diff --git a/android/abi_gki_aarch64_fips140 b/android/abi_gki_aarch64_fips140 index faea593f3b77..68a850db7f58 100644 --- a/android/abi_gki_aarch64_fips140 +++ b/android/abi_gki_aarch64_fips140 @@ -112,6 +112,7 @@ _raw_spin_lock _raw_spin_unlock refcount_warn_saturate + rng_is_initialized scatterwalk_ffwd scatterwalk_map_and_copy sg_init_one diff --git a/arch/alpha/include/asm/timex.h b/arch/alpha/include/asm/timex.h index b565cc6f408e..f89798da8a14 100644 --- a/arch/alpha/include/asm/timex.h +++ b/arch/alpha/include/asm/timex.h @@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void) __asm__ __volatile__ ("rpcc %0" : "=r"(ret)); return ret; } +#define get_cycles get_cycles #endif diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 438b10c44d73..2b7a314b8452 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -59,7 +59,7 @@ srmcons_do_receive_chars(struct tty_port *port) } while((result.bits.status & 1) && (++loops < 10)); if (count) - tty_schedule_flip(port); + tty_flip_buffer_push(port); return count; } diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index a362714ae9fc..1ef89dd55d92 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -117,11 +117,6 @@ pinctrl_fwspid_default: fwspid_default { groups = "FWSPID"; }; - pinctrl_fwqspid_default: fwqspid_default { - function = "FWSPID"; - groups = "FWQSPID"; - }; - pinctrl_fwspiwp_default: fwspiwp_default { function = "FWSPIWP"; groups = "FWSPIWP"; @@ -653,12 +648,12 @@ pinctrl_pwm9g1_default: pwm9g1_default { }; pinctrl_qspi1_default: qspi1_default { - function = "QSPI1"; + function = "SPI1"; groups = "QSPI1"; }; pinctrl_qspi2_default: qspi2_default { - function = "QSPI2"; + function = "SPI2"; groups = "QSPI2"; }; diff --git a/arch/arm/boot/dts/at91-sam9x60ek.dts b/arch/arm/boot/dts/at91-sam9x60ek.dts index b1068cca4228..fd8dc1183b3e 100644 --- a/arch/arm/boot/dts/at91-sam9x60ek.dts +++ b/arch/arm/boot/dts/at91-sam9x60ek.dts @@ -233,10 +233,9 @@ i2c0: i2c@600 { status = "okay"; eeprom@53 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x53>; pagesize = <16>; - size = <128>; status = "okay"; }; }; diff --git a/arch/arm/boot/dts/at91-sama5d2_icp.dts b/arch/arm/boot/dts/at91-sama5d2_icp.dts index 308d472bd104..634411d13b4a 100644 --- a/arch/arm/boot/dts/at91-sama5d2_icp.dts +++ b/arch/arm/boot/dts/at91-sama5d2_icp.dts @@ -317,21 +317,21 @@ &i2c1 { status = "okay"; eeprom@50 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x50>; pagesize = <16>; status = "okay"; }; eeprom@52 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x52>; pagesize = <16>; status = "disabled"; }; eeprom@53 { - compatible = "atmel,24c32"; + compatible = "atmel,24c02"; reg = <0x53>; pagesize = <16>; status = "disabled"; diff --git a/arch/arm/boot/dts/bcm2835-rpi-b.dts b/arch/arm/boot/dts/bcm2835-rpi-b.dts index 1b63d6b19750..25d87212cefd 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-b.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-b.dts @@ -53,18 +53,17 @@ &gpio { "GPIO18", "NC", /* GPIO19 */ "NC", /* GPIO20 */ - "GPIO21", + "CAM_GPIO0", "GPIO22", "GPIO23", "GPIO24", "GPIO25", "NC", /* GPIO26 */ - "CAM_GPIO0", - /* Binary number representing build/revision */ - "CONFIG0", - "CONFIG1", - "CONFIG2", - "CONFIG3", + "GPIO27", + "GPIO28", + "GPIO29", + "GPIO30", + "GPIO31", "NC", /* GPIO32 */ "NC", /* GPIO33 */ "NC", /* GPIO34 */ diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index 33b2b77aa47d..00582eb2c12e 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -74,16 +74,18 @@ &gpio { "GPIO27", "SDA0", "SCL0", - "NC", /* GPIO30 */ - "NC", /* GPIO31 */ - "NC", /* GPIO32 */ - "NC", /* GPIO33 */ - "NC", /* GPIO34 */ - "NC", /* GPIO35 */ - "NC", /* GPIO36 */ - "NC", /* GPIO37 */ - "NC", /* GPIO38 */ - "NC", /* GPIO39 */ + /* Used by BT module */ + "CTS0", + "RTS0", + "TXD0", + "RXD0", + /* Used by Wifi */ + "SD1_CLK", + "SD1_CMD", + "SD1_DATA0", + "SD1_DATA1", + "SD1_DATA2", + "SD1_DATA3", "CAM_GPIO1", /* GPIO40 */ "WL_ON", /* GPIO41 */ "NC", /* GPIO42 */ diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts index 61010266ca9a..90472e76a313 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts @@ -45,7 +45,7 @@ expgpio: gpio { #gpio-cells = <2>; gpio-line-names = "BT_ON", "WL_ON", - "STATUS_LED_R", + "PWR_LED_R", "LAN_RUN", "", "CAM_GPIO0", diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts index 588d9411ceb6..3dfce4312dfc 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts @@ -63,8 +63,8 @@ &gpio { "GPIO43", "GPIO44", "GPIO45", - "GPIO46", - "GPIO47", + "SMPS_SCL", + "SMPS_SDA", /* Used by eMMC */ "SD_CLK_R", "SD_CMD_R", diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts index 572198b6834e..06c4e0996503 100644 --- a/arch/arm/boot/dts/exynos5250-smdk5250.dts +++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts @@ -129,7 +129,7 @@ &i2c_0 { samsung,i2c-max-bus-freq = <20000>; eeprom@50 { - compatible = "samsung,s524ad0xd1"; + compatible = "samsung,s524ad0xd1", "atmel,24c128"; reg = <0x50>; }; @@ -289,7 +289,7 @@ &i2c_1 { samsung,i2c-max-bus-freq = <20000>; eeprom@51 { - compatible = "samsung,s524ad0xd1"; + compatible = "samsung,s524ad0xd1", "atmel,24c128"; reg = <0x51>; }; diff --git a/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts b/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts index b4a9523e325b..864dc5018451 100644 --- a/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts +++ b/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts @@ -297,7 +297,11 @@ &fec { phy-mode = "rmii"; phy-reset-gpios = <&gpio1 18 GPIO_ACTIVE_LOW>; phy-handle = <&phy>; - clocks = <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET>, <&rmii_clk>; + clocks = <&clks IMX6QDL_CLK_ENET>, + <&clks IMX6QDL_CLK_ENET>, + <&rmii_clk>, + <&clks IMX6QDL_CLK_ENET_REF>; + clock-names = "ipg", "ahb", "ptp", "enet_out"; status = "okay"; mdio { diff --git a/arch/arm/boot/dts/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/imx6qdl-colibri.dtsi index 4e2a309c93fa..1e86b3814708 100644 --- a/arch/arm/boot/dts/imx6qdl-colibri.dtsi +++ b/arch/arm/boot/dts/imx6qdl-colibri.dtsi @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ OR MIT /* - * Copyright 2014-2020 Toradex + * Copyright 2014-2022 Toradex * Copyright 2012 Freescale Semiconductor, Inc. * Copyright 2011 Linaro Ltd. */ @@ -132,7 +132,7 @@ &i2c2 { clock-frequency = <100000>; pinctrl-names = "default", "gpio"; pinctrl-0 = <&pinctrl_i2c2>; - pinctrl-0 = <&pinctrl_i2c2_gpio>; + pinctrl-1 = <&pinctrl_i2c2_gpio>; scl-gpios = <&gpio2 30 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; sda-gpios = <&gpio3 16 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; status = "okay"; @@ -488,7 +488,7 @@ MX6QDL_PAD_EIM_D16__I2C2_SDA 0x4001b8b1 >; }; - pinctrl_i2c2_gpio: i2c2grp { + pinctrl_i2c2_gpio: i2c2gpiogrp { fsl,pins = < MX6QDL_PAD_EIM_EB2__GPIO2_IO30 0x4001b8b1 MX6QDL_PAD_EIM_D16__GPIO3_IO16 0x4001b8b1 diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi index e6aa0c33754d..966038ecc5bf 100644 --- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi +++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi @@ -226,7 +226,7 @@ gpio8: gpio@28 { reg = <0x28>; #gpio-cells = <2>; gpio-controller; - ngpio = <32>; + ngpios = <62>; }; sgtl5000: codec@a { diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 7a8837cbe21b..7858ae5d39df 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -756,7 +756,7 @@ reg_pu: regulator-vddpu { regulator-name = "vddpu"; regulator-min-microvolt = <725000>; regulator-max-microvolt = <1450000>; - regulator-enable-ramp-delay = <150>; + regulator-enable-ramp-delay = <380>; anatop-reg-offset = <0x140>; anatop-vol-bit-shift = <9>; anatop-vol-bit-width = <5>; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 84d9cc13afb9..9e1b0af0aa43 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -102,6 +102,7 @@ usbphynop3: usbphynop3 { compatible = "usb-nop-xceiv"; clocks = <&clks IMX7D_USB_HSIC_ROOT_CLK>; clock-names = "main_clk"; + power-domains = <&pgc_hsic_phy>; #phy-cells = <0>; }; @@ -1104,7 +1105,6 @@ usbh: usb@30b30000 { compatible = "fsl,imx7d-usb", "fsl,imx27-usb"; reg = <0x30b30000 0x200>; interrupts = ; - power-domains = <&pgc_hsic_phy>; clocks = <&clks IMX7D_USB_CTRL_CLK>; fsl,usbphy = <&usbphynop3>; fsl,usbmisc = <&usbmisc3 0>; diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi index 90846a7655b4..dde4364892bf 100644 --- a/arch/arm/boot/dts/ox820.dtsi +++ b/arch/arm/boot/dts/ox820.dtsi @@ -287,7 +287,7 @@ local-timer@600 { clocks = <&armclk>; }; - gic: gic@1000 { + gic: interrupt-controller@1000 { compatible = "arm,arm11mp-gic"; interrupt-controller; #interrupt-cells = <3>; diff --git a/arch/arm/boot/dts/s5pv210-aries.dtsi b/arch/arm/boot/dts/s5pv210-aries.dtsi index bd4450dbdcb6..9005f0a23e8f 100644 --- a/arch/arm/boot/dts/s5pv210-aries.dtsi +++ b/arch/arm/boot/dts/s5pv210-aries.dtsi @@ -564,7 +564,6 @@ panel@0 { reset-gpios = <&mp05 5 GPIO_ACTIVE_LOW>; vdd3-supply = <&ldo7_reg>; vci-supply = <&ldo17_reg>; - spi-cs-high; spi-max-frequency = <1200000>; pinctrl-names = "default"; @@ -637,7 +636,7 @@ touchscreen@4a { }; &i2s0 { - dmas = <&pdma0 9>, <&pdma0 10>, <&pdma0 11>; + dmas = <&pdma0 10>, <&pdma0 9>, <&pdma0 11>; status = "okay"; }; @@ -896,7 +895,7 @@ bluetooth { device-wakeup-gpios = <&gpg3 4 GPIO_ACTIVE_HIGH>; interrupt-parent = <&gph2>; interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "host-wake"; + interrupt-names = "host-wakeup"; }; }; diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 2871351ab907..eb7e3660ada7 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -240,8 +240,8 @@ i2s0: i2s@eee30000 { reg = <0xeee30000 0x1000>; interrupt-parent = <&vic2>; interrupts = <16>; - dma-names = "rx", "tx", "tx-sec"; - dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; + dma-names = "tx", "rx", "tx-sec"; + dmas = <&pdma1 10>, <&pdma1 9>, <&pdma1 11>; clock-names = "iis", "i2s_opclk0", "i2s_opclk1"; @@ -260,8 +260,8 @@ i2s1: i2s@e2100000 { reg = <0xe2100000 0x1000>; interrupt-parent = <&vic2>; interrupts = <17>; - dma-names = "rx", "tx"; - dmas = <&pdma1 12>, <&pdma1 13>; + dma-names = "tx", "rx"; + dmas = <&pdma1 13>, <&pdma1 12>; clock-names = "iis", "i2s_opclk0"; clocks = <&clocks CLK_I2S1>, <&clocks SCLK_AUDIO1>; pinctrl-names = "default"; @@ -275,8 +275,8 @@ i2s2: i2s@e2a00000 { reg = <0xe2a00000 0x1000>; interrupt-parent = <&vic2>; interrupts = <18>; - dma-names = "rx", "tx"; - dmas = <&pdma1 14>, <&pdma1 15>; + dma-names = "tx", "rx"; + dmas = <&pdma1 15>, <&pdma1 14>; clock-names = "iis", "i2s_opclk0"; clocks = <&clocks CLK_I2S2>, <&clocks SCLK_AUDIO2>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 12f57278ba4a..33f76d14341e 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -1125,7 +1125,7 @@ AT91_XDMAC_DT_PERID(33))>, clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>; clock-names = "pclk", "gclk"; assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>; - assigned-parrents = <&pmc PMC_TYPE_GCK 55>; + assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index 7a0ef01de969..9919fc86bdc3 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -543,7 +543,7 @@ cec: cec@40016000 { compatible = "st,stm32-cec"; reg = <0x40016000 0x400>; interrupts = ; - clocks = <&rcc CEC_K>, <&clk_lse>; + clocks = <&rcc CEC_K>, <&rcc CEC>; clock-names = "cec", "hdmi-cec"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi index 944d38b85eef..f3e0c790a4b1 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi @@ -141,6 +141,7 @@ mdio0 { compatible = "snps,dwmac-mdio"; reset-gpios = <&gpioz 2 GPIO_ACTIVE_LOW>; reset-delay-us = <1000>; + reset-post-delay-us = <1000>; phy0: ethernet-phy@7 { reg = <7>; diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts index f19ed981da9d..3706216ffb40 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts @@ -169,7 +169,7 @@ &spi0 { flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mxicy,mx25l1606e", "winbond,w25q128"; + compatible = "mxicy,mx25l1606e", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <40000000>; }; diff --git a/arch/arm/boot/dts/suniv-f1c100s.dtsi b/arch/arm/boot/dts/suniv-f1c100s.dtsi index 6100d3b75f61..def830101448 100644 --- a/arch/arm/boot/dts/suniv-f1c100s.dtsi +++ b/arch/arm/boot/dts/suniv-f1c100s.dtsi @@ -104,8 +104,10 @@ timer@1c20c00 { wdt: watchdog@1c20ca0 { compatible = "allwinner,suniv-f1c100s-wdt", - "allwinner,sun4i-a10-wdt"; + "allwinner,sun6i-a31-wdt"; reg = <0x01c20ca0 0x20>; + interrupts = <16>; + clocks = <&osc32k>; }; uart0: serial@1c25000 { diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 8f26c454ea12..1615597a0438 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o +obj-$(if $(CONFIG_CRYPTO_BLAKE2S_ARM),y) += libblake2s-arm.o obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o @@ -31,7 +32,8 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) -blake2s-arm-y := blake2s-core.o blake2s-glue.o +blake2s-arm-y := blake2s-shash.o +libblake2s-arm-y:= blake2s-core.o blake2s-glue.o blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o diff --git a/arch/arm/crypto/blake2s-core.S b/arch/arm/crypto/blake2s-core.S index 86345751bbf3..df40e46601f1 100644 --- a/arch/arm/crypto/blake2s-core.S +++ b/arch/arm/crypto/blake2s-core.S @@ -167,8 +167,8 @@ .endm // -// void blake2s_compress_arch(struct blake2s_state *state, -// const u8 *block, size_t nblocks, u32 inc); +// void blake2s_compress(struct blake2s_state *state, +// const u8 *block, size_t nblocks, u32 inc); // // Only the first three fields of struct blake2s_state are used: // u32 h[8]; (inout) @@ -176,7 +176,7 @@ // u32 f[2]; (in) // .align 5 -ENTRY(blake2s_compress_arch) +ENTRY(blake2s_compress) push {r0-r2,r4-r11,lr} // keep this an even number .Lnext_block: @@ -303,4 +303,4 @@ ENTRY(blake2s_compress_arch) str r3, [r12], #4 bne 1b b .Lcopy_block_done -ENDPROC(blake2s_compress_arch) +ENDPROC(blake2s_compress) diff --git a/arch/arm/crypto/blake2s-glue.c b/arch/arm/crypto/blake2s-glue.c index f2cc1e5fc9ec..0238a70d9581 100644 --- a/arch/arm/crypto/blake2s-glue.c +++ b/arch/arm/crypto/blake2s-glue.c @@ -1,78 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* - * BLAKE2s digest algorithm, ARM scalar implementation - * - * Copyright 2020 Google LLC - */ #include -#include - #include /* defined in blake2s-core.S */ -EXPORT_SYMBOL(blake2s_compress_arch); - -static int crypto_blake2s_update_arm(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch); -} - -static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) -{ - return crypto_blake2s_final(desc, out, blake2s_compress_arch); -} - -#define BLAKE2S_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 200, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2s_setkey, \ - .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update_arm, \ - .final = crypto_blake2s_final_arm, \ - .descsize = sizeof(struct blake2s_state), \ - } - -static struct shash_alg blake2s_arm_algs[] = { - BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE), - BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE), - BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE), - BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE), -}; - -static int __init blake2s_arm_mod_init(void) -{ - return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? - crypto_register_shashes(blake2s_arm_algs, - ARRAY_SIZE(blake2s_arm_algs)) : 0; -} - -static void __exit blake2s_arm_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) - crypto_unregister_shashes(blake2s_arm_algs, - ARRAY_SIZE(blake2s_arm_algs)); -} - -module_init(blake2s_arm_mod_init); -module_exit(blake2s_arm_mod_exit); - -MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers "); -MODULE_ALIAS_CRYPTO("blake2s-128"); -MODULE_ALIAS_CRYPTO("blake2s-128-arm"); -MODULE_ALIAS_CRYPTO("blake2s-160"); -MODULE_ALIAS_CRYPTO("blake2s-160-arm"); -MODULE_ALIAS_CRYPTO("blake2s-224"); -MODULE_ALIAS_CRYPTO("blake2s-224-arm"); -MODULE_ALIAS_CRYPTO("blake2s-256"); -MODULE_ALIAS_CRYPTO("blake2s-256-arm"); +EXPORT_SYMBOL(blake2s_compress); diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c new file mode 100644 index 000000000000..763c73beea2d --- /dev/null +++ b/arch/arm/crypto/blake2s-shash.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * BLAKE2s digest algorithm, ARM scalar implementation + * + * Copyright 2020 Google LLC + */ + +#include +#include + +#include + +static int crypto_blake2s_update_arm(struct shash_desc *desc, + const u8 *in, unsigned int inlen) +{ + return crypto_blake2s_update(desc, in, inlen, false); +} + +static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) +{ + return crypto_blake2s_final(desc, out, false); +} + +#define BLAKE2S_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 200, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2s_setkey, \ + .init = crypto_blake2s_init, \ + .update = crypto_blake2s_update_arm, \ + .final = crypto_blake2s_final_arm, \ + .descsize = sizeof(struct blake2s_state), \ + } + +static struct shash_alg blake2s_arm_algs[] = { + BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE), + BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE), + BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE), + BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE), +}; + +static int __init blake2s_arm_mod_init(void) +{ + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? + crypto_register_shashes(blake2s_arm_algs, + ARRAY_SIZE(blake2s_arm_algs)) : 0; +} + +static void __exit blake2s_arm_mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) + crypto_unregister_shashes(blake2s_arm_algs, + ARRAY_SIZE(blake2s_arm_algs)); +} + +module_init(blake2s_arm_mod_init); +module_exit(blake2s_arm_mod_exit); + +MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Biggers "); +MODULE_ALIAS_CRYPTO("blake2s-128"); +MODULE_ALIAS_CRYPTO("blake2s-128-arm"); +MODULE_ALIAS_CRYPTO("blake2s-160"); +MODULE_ALIAS_CRYPTO("blake2s-160-arm"); +MODULE_ALIAS_CRYPTO("blake2s-224"); +MODULE_ALIAS_CRYPTO("blake2s-224-arm"); +MODULE_ALIAS_CRYPTO("blake2s-256"); +MODULE_ALIAS_CRYPTO("blake2s-256-arm"); diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h index a81dda65c576..45180a2cc47c 100644 --- a/arch/arm/include/asm/dma.h +++ b/arch/arm/include/asm/dma.h @@ -10,7 +10,7 @@ #else #define MAX_DMA_ADDRESS ({ \ extern phys_addr_t arm_dma_zone_size; \ - arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \ + arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \ (PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; }) #endif diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index 92282558caf7..2b8970d8e5a2 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -27,6 +27,7 @@ enum { MT_HIGH_VECTORS, MT_MEMORY_RWX, MT_MEMORY_RW, + MT_MEMORY_RO, MT_ROM, MT_MEMORY_RWX_NONCACHED, MT_MEMORY_RW_DTCM, diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 91d6b7856be4..73c83f4d33b3 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -164,5 +164,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \ }) + +/* + * Update ITSTATE after normal execution of an IT block instruction. + * + * The 8 IT state bits are split into two parts in CPSR: + * ITSTATE<1:0> are in CPSR<26:25> + * ITSTATE<7:2> are in CPSR<15:10> + */ +static inline unsigned long it_advance(unsigned long cpsr) +{ + if ((cpsr & 0x06000400) == 0) { + /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ + cpsr &= ~PSR_IT_MASK; + } else { + /* We need to shift left ITSTATE<4:0> */ + const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ + unsigned long it = cpsr & mask; + it <<= 1; + it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ + it &= mask; + cpsr &= ~mask; + cpsr |= it; + } + return cpsr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 7c3b3671d6c2..6d1337c169cd 100644 --- a/arch/arm/include/asm/timex.h +++ b/arch/arm/include/asm/timex.h @@ -11,5 +11,6 @@ typedef unsigned long cycles_t; #define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) +#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback()) #endif diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c3ebe3584103..030351d169aa 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1043,7 +1043,7 @@ vector_bhb_loop8_\name: @ bhb workaround mov r0, #8 -3: b . + 4 +3: W(b) . + 4 subs r0, r0, #1 bne 3b dsb diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index db798eac7431..824774999825 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -53,17 +53,17 @@ int notrace unwind_frame(struct stackframe *frame) return -EINVAL; frame->sp = frame->fp; - frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4)); #else /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) return -EINVAL; /* restore the registers from the stack frame */ - frame->fp = *(unsigned long *)(fp - 12); - frame->sp = *(unsigned long *)(fp - 8); - frame->pc = *(unsigned long *)(fp - 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 12)); + frame->sp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 8)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 4)); #endif return 0; diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index b99dd8e1c93f..7ba6cf826162 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -26,8 +26,9 @@ MODULE_LICENSE("GPL"); * While older versions of GCC do not generate incorrect code, they fail to * recognize the parallel nature of these functions, and emit plain ARM code, * which is known to be slower than the optimized ARM code in asm-arm/xor.h. + * + * #warning This code requires at least version 4.6 of GCC */ -#warning This code requires at least version 4.6 of GCC #endif #pragma GCC diagnostic ignored "-Wunused-variable" diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 3f015cb6ec2b..f2ce2d094925 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -104,7 +104,7 @@ static const struct wakeup_source_info ws_info[] = { static const struct of_device_id sama5d2_ws_ids[] = { { .compatible = "atmel,sama5d2-gem", .data = &ws_info[0] }, - { .compatible = "atmel,at91rm9200-rtc", .data = &ws_info[1] }, + { .compatible = "atmel,sama5d2-rtc", .data = &ws_info[1] }, { .compatible = "atmel,sama5d3-udc", .data = &ws_info[2] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, @@ -115,12 +115,12 @@ static const struct of_device_id sama5d2_ws_ids[] = { }; static const struct of_device_id sam9x60_ws_ids[] = { - { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] }, + { .compatible = "microchip,sam9x60-rtc", .data = &ws_info[1] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] }, { .compatible = "usb-ehci", .data = &ws_info[2] }, - { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] }, + { .compatible = "microchip,sam9x60-rtt", .data = &ws_info[4] }, { .compatible = "cdns,sam9x60-macb", .data = &ws_info[5] }, { /* sentinel */ } }; diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c index 512943eae30a..2e203626eda5 100644 --- a/arch/arm/mach-axxia/platsmp.c +++ b/arch/arm/mach-axxia/platsmp.c @@ -39,6 +39,7 @@ static int axxia_boot_secondary(unsigned int cpu, struct task_struct *idle) return -ENOENT; syscon = of_iomap(syscon_np, 0); + of_node_put(syscon_np); if (!syscon) return -ENOMEM; diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index e4f4b20b83a2..3fc4ec830e3a 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -372,6 +372,7 @@ static void __init cns3xxx_init(void) /* De-Asscer SATA Reset */ cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA)); } + of_node_put(dn); dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci"); if (of_device_is_available(dn)) { @@ -385,6 +386,7 @@ static void __init cns3xxx_init(void) cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO)); cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO)); } + of_node_put(dn); pm_power_off = cns3xxx_power_off; diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 83d1d1327f96..1276585f72c5 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -149,6 +149,7 @@ static void exynos_map_pmu(void) np = of_find_matching_node(NULL, exynos_dt_pmu_match); if (np) pmu_base_addr = of_iomap(np, 0); + of_node_put(np); } static void __init exynos_init_irq(void) diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c index da7a09c1dae5..1cd1d9b0aabf 100644 --- a/arch/arm/mach-hisi/platsmp.c +++ b/arch/arm/mach-hisi/platsmp.c @@ -67,14 +67,17 @@ static void __init hi3xxx_smp_prepare_cpus(unsigned int max_cpus) } ctrl_base = of_iomap(np, 0); if (!ctrl_base) { + of_node_put(np); pr_err("failed to map address\n"); return; } if (of_property_read_u32(np, "smp-offset", &offset) < 0) { + of_node_put(np); pr_err("failed to find smp-offset property\n"); return; } ctrl_base += offset; + of_node_put(np); } } @@ -160,6 +163,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle) if (WARN_ON(!node)) return -1; ctrl_base = of_iomap(node, 0); + of_node_put(node); /* set the secondary core boot from DDR */ remap_reg_value = readl_relaxed(ctrl_base + REG_SC_CTRL); diff --git a/arch/arm/mach-mediatek/Kconfig b/arch/arm/mach-mediatek/Kconfig index 9e0f592d87d8..35a3430c7942 100644 --- a/arch/arm/mach-mediatek/Kconfig +++ b/arch/arm/mach-mediatek/Kconfig @@ -30,6 +30,7 @@ config MACH_MT7623 config MACH_MT7629 bool "MediaTek MT7629 SoCs support" default ARCH_MEDIATEK + select HAVE_ARM_ARCH_TIMER config MACH_MT8127 bool "MediaTek MT8127 SoCs support" diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c index 4b8ad728bb42..32ac60b89fdc 100644 --- a/arch/arm/mach-meson/platsmp.c +++ b/arch/arm/mach-meson/platsmp.c @@ -71,6 +71,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } sram_base = of_iomap(node, 0); + of_node_put(node); if (!sram_base) { pr_err("Couldn't map SRAM registers\n"); return; @@ -91,6 +92,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } scu_base = of_iomap(node, 0); + of_node_put(node); if (!scu_base) { pr_err("Couldn't map SCU registers\n"); return; diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index bd5be82101f3..d89bda12bf3c 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(clockfw_lock); unsigned long omap1_uart_recalc(struct clk *clk) { unsigned int val = __raw_readl(clk->enable_reg); - return val & clk->enable_bit ? 48000000 : 12000000; + return val & 1 << clk->enable_bit ? 48000000 : 12000000; } unsigned long omap1_sossi_recalc(struct clk *clk) diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 2e35354b61f5..167e871f059e 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -354,13 +354,13 @@ static struct platform_device cm_x300_spi_gpio = { static struct gpiod_lookup_table cm_x300_spi_gpiod_table = { .dev_id = "spi_gpio", .table = { - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_SCL, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_SCL - GPIO_LCD_BASE, "sck", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_DIN, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_DIN - GPIO_LCD_BASE, "mosi", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_DOUT, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_DOUT - GPIO_LCD_BASE, "miso", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_CS, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_CS - GPIO_LCD_BASE, "cs", GPIO_ACTIVE_HIGH), { }, }, diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index cd9fa465b9b2..9aee8e0f2bb1 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -681,7 +681,7 @@ static struct platform_device bq24022 = { static struct gpiod_lookup_table bq24022_gpiod_table = { .dev_id = "gpio-regulator", .table = { - GPIO_LOOKUP("gpio-pxa", EGPIO_MAGICIAN_BQ24022_ISET2, + GPIO_LOOKUP("htc-egpio-0", EGPIO_MAGICIAN_BQ24022_ISET2 - MAGICIAN_EGPIO_BASE, NULL, GPIO_ACTIVE_HIGH), GPIO_LOOKUP("gpio-pxa", GPIO30_MAGICIAN_BQ24022_nCHARGE_EN, "enable", GPIO_ACTIVE_LOW), diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index 431709725d02..ded5e343e198 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -296,9 +296,9 @@ static struct gpiod_lookup_table tosa_mci_gpio_table = { .table = { GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_nSD_DETECT, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_SD_WP, + GPIO_LOOKUP("sharp-scoop.0", TOSA_GPIO_SD_WP - TOSA_SCOOP_GPIO_BASE, "wp", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_PWR_ON, + GPIO_LOOKUP("sharp-scoop.0", TOSA_GPIO_PWR_ON - TOSA_SCOOP_GPIO_BASE, "power", GPIO_ACTIVE_HIGH), { }, }, diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index a0554d7d04f7..e1adc098f89a 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -144,6 +144,7 @@ static int __init dcscb_init(void) if (!node) return -ENODEV; dcscb_base = of_iomap(node, 0); + of_node_put(node); if (!dcscb_base) return -EADDRNOTAVAIL; cfg = readl_relaxed(dcscb_base + DCS_CFG_R); diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index ea81e89e7740..bcefe3f51744 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (type == TYPE_LDST) do_alignment_finish_ldst(addr, instr, regs, offset); + if (thumb_mode(regs)) + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); + return 0; bad_or_fault: diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 3e3001998460..86f213f1b44b 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -296,6 +296,13 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_RO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, [MT_ROM] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, @@ -490,6 +497,7 @@ static void __init build_mem_type_table(void) /* Also setup NX memory mapping */ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN; } if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* @@ -569,6 +577,7 @@ static void __init build_mem_type_table(void) mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; #endif /* @@ -588,6 +597,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; @@ -648,6 +659,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -1342,7 +1355,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); map.virtual = FDT_FIXED_BASE; map.length = FDT_FIXED_SIZE; - map.type = MT_ROM; + map.type = MT_MEMORY_RO; create_mapping(&map); } diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 06dbfb968182..8bc7a2d6d6c7 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -108,8 +108,7 @@ static unsigned int spectre_v2_install_workaround(unsigned int method) #else static unsigned int spectre_v2_install_workaround(unsigned int method) { - pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n", - smp_processor_id()); + pr_info_once("Spectre V2: workarounds disabled by configuration\n"); return SPECTRE_VULNERABLE; } @@ -209,10 +208,10 @@ static int spectre_bhb_install_workaround(int method) return SPECTRE_VULNERABLE; spectre_bhb_method = method; - } - pr_info("CPU%u: Spectre BHB: using %s workaround\n", - smp_processor_id(), spectre_bhb_method_name(method)); + pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n", + smp_processor_id(), spectre_bhb_method_name(method)); + } return SPECTRE_MITIGATED; } @@ -288,6 +287,7 @@ void cpu_v7_ca15_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } void cpu_v7_bugs_init(void) diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index 973173598992..facc889d05ee 100644 --- a/arch/arm/probes/decode.h +++ b/arch/arm/probes/decode.h @@ -14,6 +14,7 @@ #include #include #include +#include #include void __init arm_probes_decode_init(void); @@ -35,31 +36,6 @@ void __init find_str_pc_offset(void); #endif -/* - * Update ITSTATE after normal execution of an IT block instruction. - * - * The 8 IT state bits are split into two parts in CPSR: - * ITSTATE<1:0> are in CPSR<26:25> - * ITSTATE<7:2> are in CPSR<15:10> - */ -static inline unsigned long it_advance(unsigned long cpsr) - { - if ((cpsr & 0x06000400) == 0) { - /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ - cpsr &= ~PSR_IT_MASK; - } else { - /* We need to shift left ITSTATE<4:0> */ - const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ - unsigned long it = cpsr & mask; - it <<= 1; - it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ - it &= mask; - cpsr &= ~mask; - cpsr |= it; - } - return cpsr; -} - static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs) { long cpsr = regs->ARM_cpsr; diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index acb464547a54..4a1991a103ea 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -62,11 +62,12 @@ static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new) unsigned long __pfn_to_mfn(unsigned long pfn) { - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; struct xen_p2m_entry *entry; unsigned long irqflags; read_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (entry->pfn <= pfn && @@ -153,10 +154,11 @@ bool __set_phys_to_machine_multi(unsigned long pfn, int rc; unsigned long irqflags; struct xen_p2m_entry *p2m_entry; - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; if (mfn == INVALID_P2M_ENTRY) { write_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (p2m_entry->pfn <= pfn && diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 10d05685da29..e6a2f21ba3f8 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -244,6 +244,7 @@ config ARCH_STRATIX10 config ARCH_SYNQUACER bool "Socionext SynQuacer SoC Family" + select IRQ_FASTEOI_HIERARCHY_HANDLERS config ARCH_TEGRA bool "NVIDIA Tegra SoC Family" diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi index d6b9dedd168f..5667009aae13 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi @@ -167,6 +167,7 @@ &uart3 { pinctrl-0 = <&pinctrl_uart3>; assigned-clocks = <&clk IMX8MM_CLK_UART3>; assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_80M>; + uart-has-rtscts; status = "okay"; }; @@ -237,6 +238,8 @@ pinctrl_uart3: uart3grp { fsl,pins = < MX8MM_IOMUXC_ECSPI1_SCLK_UART3_DCE_RX 0x40 MX8MM_IOMUXC_ECSPI1_MOSI_UART3_DCE_TX 0x40 + MX8MM_IOMUXC_ECSPI1_MISO_UART3_DCE_CTS_B 0x40 + MX8MM_IOMUXC_ECSPI1_SS0_UART3_DCE_RTS_B 0x40 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index c13b4a02d12f..c016f5b7d24a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -148,27 +148,27 @@ MX8MP_IOMUXC_SAI1_RXD0__GPIO4_IO02 0x19 pinctrl_gpio_led: gpioledgrp { fsl,pins = < - MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x19 + MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x140 >; }; pinctrl_i2c3: i2c3grp { fsl,pins = < - MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c3 - MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c3 + MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c2 + MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c2 >; }; pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp { fsl,pins = < - MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41 + MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40 >; }; pinctrl_uart2: uart2grp { fsl,pins = < - MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x49 - MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x49 + MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x140 + MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x140 >; }; @@ -180,7 +180,7 @@ MX8MP_IOMUXC_SD2_DATA0__USDHC2_DATA0 0x1d0 MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -192,7 +192,7 @@ MX8MP_IOMUXC_SD2_DATA0__USDHC2_DATA0 0x1d4 MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; @@ -204,7 +204,7 @@ MX8MP_IOMUXC_SD2_DATA0__USDHC2_DATA0 0x1d6 MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6 MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6 MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6 - MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1 + MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0 >; }; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 89e5ee6b78cd..0ac3ce506c9a 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -13,7 +13,7 @@ / { clocks { sleep_clk: sleep_clk { compatible = "fixed-clock"; - clock-frequency = <32000>; + clock-frequency = <32768>; #clock-cells = <0>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts index cb82864a90ef..42f2b235011f 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts @@ -64,7 +64,7 @@ pm8994-regulators { vdd_l17_29-supply = <&vreg_vph_pwr>; vdd_l20_21-supply = <&vreg_vph_pwr>; vdd_l25-supply = <&pm8994_s5>; - vdd_lvs1_2 = <&pm8994_s4>; + vdd_lvs1_2-supply = <&pm8994_s4>; pm8994_s1: s1 { regulator-min-microvolt = <800000>; diff --git a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts index 4f64ca3ea1ef..6ed2a9c01e8c 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts @@ -151,7 +151,7 @@ pm8994-regulators { vdd_l17_29-supply = <&vreg_vph_pwr>; vdd_l20_21-supply = <&vreg_vph_pwr>; vdd_l25-supply = <&pm8994_s5>; - vdd_lvs1_2 = <&pm8994_s4>; + vdd_lvs1_2-supply = <&pm8994_s4>; pm8994_s1: s1 { /* unused */ diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 45f9a44326a6..aeb5762566e9 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -92,7 +92,7 @@ CPU5: cpu@101 { CPU6: cpu@102 { device_type = "cpu"; compatible = "arm,cortex-a57"; - reg = <0x0 0x101>; + reg = <0x0 0x102>; enable-method = "psci"; next-level-cache = <&L2_1>; }; @@ -100,7 +100,7 @@ CPU6: cpu@102 { CPU7: cpu@103 { device_type = "cpu"; compatible = "arm,cortex-a57"; - reg = <0x0 0x101>; + reg = <0x0 0x103>; enable-method = "psci"; next-level-cache = <&L2_1>; }; @@ -316,7 +316,7 @@ blsp1_dma: dma@f9904000 { #dma-cells = <1>; qcom,ee = <0>; qcom,controlled-remotely; - num-channels = <18>; + num-channels = <24>; qcom,num-ees = <4>; }; @@ -412,7 +412,7 @@ blsp2_dma: dma@f9944000 { #dma-cells = <1>; qcom,ee = <0>; qcom,controlled-remotely; - num-channels = <18>; + num-channels = <24>; qcom,num-ees = <4>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 52ba4d07e771..c5f3d4f8f4d2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1471,6 +1471,7 @@ emmc_phy: phy@f780 { reg = <0xf780 0x24>; clocks = <&sdhci>; clock-names = "emmcclk"; + drive-impedance-ohm = <50>; #phy-cells = <0>; status = "disabled"; }; @@ -1481,7 +1482,6 @@ pcie_phy: pcie-phy { clock-names = "refclk"; #phy-cells = <1>; resets = <&cru SRST_PCIEPHY>; - drive-impedance-ohm = <50>; reset-names = "phy"; status = "disabled"; }; diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c index 01e22fe40823..9f4599014854 100644 --- a/arch/arm64/crypto/poly1305-glue.c +++ b/arch/arm64/crypto/poly1305-glue.c @@ -52,7 +52,7 @@ static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, { if (unlikely(!dctx->sset)) { if (!dctx->rset) { - poly1305_init_arch(dctx, src); + poly1305_init_arm64(&dctx->h, src); src += POLY1305_BLOCK_SIZE; len -= POLY1305_BLOCK_SIZE; dctx->rset = 1; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e458bf3e9e8d..57ece7965bd6 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -210,6 +210,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_ARM64_ERRATUM_1286807 { ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), + /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */ + ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, #endif {}, diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 86a5cf9bc19a..3724bab278b2 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,6 +76,66 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) return NULL; } +/* + * Find the address the callsite must branch to in order to reach '*addr'. + * + * Due to the limited range of 'BL' instructions, modules may be placed too far + * away to branch directly and must use a PLT. + * + * Returns true when '*addr' contains a reachable target address, or has been + * modified to contain a PLT address. Returns false otherwise. + */ +static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, + struct module *mod, + unsigned long *addr) +{ + unsigned long pc = rec->ip; + long offset = (long)*addr - (long)pc; + struct plt_entry *plt; + + /* + * When the target is within range of the 'BL' instruction, use 'addr' + * as-is and branch to that directly. + */ + if (offset >= -SZ_128M && offset < SZ_128M) + return true; + + /* + * When the target is outside of the range of a 'BL' instruction, we + * must use a PLT to reach it. We can only place PLTs for modules, and + * only when module PLT support is built-in. + */ + if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + return false; + + /* + * 'mod' is only set at module load time, but if we end up + * dealing with an out-of-range condition, we can assume it + * is due to a module being loaded far away from the kernel. + * + * NOTE: __module_text_address() must be called with preemption + * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * retains its validity throughout the remainder of this code. + */ + if (!mod) { + preempt_disable(); + mod = __module_text_address(pc); + preempt_enable(); + } + + if (WARN_ON(!mod)) + return false; + + plt = get_ftrace_plt(mod, *addr); + if (!plt) { + pr_err("ftrace: no module PLT for %ps\n", (void *)*addr); + return false; + } + + *addr = (unsigned long)plt; + return true; +} + /* * Turn on the call to ftrace_caller() in instrumented function */ @@ -83,40 +143,9 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; u32 old, new; - long offset = (long)pc - (long)addr; - if (offset < -SZ_128M || offset >= SZ_128M) { - struct module *mod; - struct plt_entry *plt; - - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) - return -EINVAL; - - /* - * On kernels that support module PLTs, the offset between the - * branch instruction and its target may legally exceed the - * range of an ordinary relative 'bl' opcode. In this case, we - * need to branch via a trampoline in the module. - * - * NOTE: __module_text_address() must be called with preemption - * disabled, but we can rely on ftrace_lock to ensure that 'mod' - * retains its validity throughout the remainder of this code. - */ - preempt_disable(); - mod = __module_text_address(pc); - preempt_enable(); - - if (WARN_ON(!mod)) - return -EINVAL; - - plt = get_ftrace_plt(mod, addr); - if (!plt) { - pr_err("ftrace: no module PLT for %ps\n", (void *)addr); - return -EINVAL; - } - - addr = (unsigned long)plt; - } + if (!ftrace_find_callable_addr(rec, NULL, &addr)) + return -EINVAL; old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -131,6 +160,11 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long pc = rec->ip; u32 old, new; + if (!ftrace_find_callable_addr(rec, NULL, &old_addr)) + return -EINVAL; + if (!ftrace_find_callable_addr(rec, NULL, &addr)) + return -EINVAL; + old = aarch64_insn_gen_branch_imm(pc, old_addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -180,54 +214,15 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - bool validate = true; u32 old = 0, new; - long offset = (long)pc - (long)addr; - if (offset < -SZ_128M || offset >= SZ_128M) { - u32 replaced; - - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) - return -EINVAL; - - /* - * 'mod' is only set at module load time, but if we end up - * dealing with an out-of-range condition, we can assume it - * is due to a module being loaded far away from the kernel. - */ - if (!mod) { - preempt_disable(); - mod = __module_text_address(pc); - preempt_enable(); - - if (WARN_ON(!mod)) - return -EINVAL; - } - - /* - * The instruction we are about to patch may be a branch and - * link instruction that was redirected via a PLT entry. In - * this case, the normal validation will fail, but we can at - * least check that we are dealing with a branch and link - * instruction that points into the right module. - */ - if (aarch64_insn_read((void *)pc, &replaced)) - return -EFAULT; - - if (!aarch64_insn_is_bl(replaced) || - !within_module(pc + aarch64_get_branch_offset(replaced), - mod)) - return -EINVAL; - - validate = false; - } else { - old = aarch64_insn_gen_branch_imm(pc, addr, - AARCH64_INSN_BRANCH_LINK); - } + if (!ftrace_find_callable_addr(rec, mod, &addr)) + return -EINVAL; + old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_nop(); - return ftrace_modify_code(pc, old, new, validate); + return ftrace_modify_code(pc, old, new, true); } void arch_ftrace_update_code(int command) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index acd43c57d9a0..c6a37d93539b 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -67,6 +67,9 @@ void mte_sync_tags(pte_t *ptep, pte_t pte) if (!test_and_set_bit(PG_mte_tagged, &page->flags)) mte_sync_page_tags(page, ptep, check_swap); } + + /* ensure the tags are visible before the PTE is set */ + smp_wmb(); } int memcmp_pages(struct page *page1, struct page *page2) diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index c07d7a034941..69ec670bcc70 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -30,7 +30,7 @@ struct paravirt_patch_template pv_ops; EXPORT_SYMBOL_GPL(pv_ops); struct pv_time_stolen_time_region { - struct pvclock_vcpu_stolen_time *kaddr; + struct pvclock_vcpu_stolen_time __rcu *kaddr; }; static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region); @@ -47,7 +47,9 @@ early_param("no-steal-acc", parse_no_stealacc); /* return stolen time in ns by asking the hypervisor */ static u64 pv_steal_clock(int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; + u64 ret = 0; reg = per_cpu_ptr(&stolen_time_region, cpu); @@ -56,28 +58,37 @@ static u64 pv_steal_clock(int cpu) * online notification callback runs. Until the callback * has run we just return zero. */ - if (!reg->kaddr) + rcu_read_lock(); + kaddr = rcu_dereference(reg->kaddr); + if (!kaddr) { + rcu_read_unlock(); return 0; + } - return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time)); + ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time)); + rcu_read_unlock(); + return ret; } static int stolen_time_cpu_down_prepare(unsigned int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; reg = this_cpu_ptr(&stolen_time_region); if (!reg->kaddr) return 0; - memunmap(reg->kaddr); - memset(reg, 0, sizeof(*reg)); + kaddr = rcu_replace_pointer(reg->kaddr, NULL, true); + synchronize_rcu(); + memunmap(kaddr); return 0; } static int stolen_time_cpu_online(unsigned int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; struct arm_smccc_res res; @@ -88,17 +99,19 @@ static int stolen_time_cpu_online(unsigned int cpu) if (res.a0 == SMCCC_RET_NOT_SUPPORTED) return -EINVAL; - reg->kaddr = memremap(res.a0, + kaddr = memremap(res.a0, sizeof(struct pvclock_vcpu_stolen_time), MEMREMAP_WB); + rcu_assign_pointer(reg->kaddr, kaddr); + if (!reg->kaddr) { pr_warn("Failed to map stolen time data structure\n"); return -ENOMEM; } - if (le32_to_cpu(reg->kaddr->revision) != 0 || - le32_to_cpu(reg->kaddr->attributes) != 0) { + if (le32_to_cpu(kaddr->revision) != 0 || + le32_to_cpu(kaddr->attributes) != 0) { pr_warn_once("Unexpected revision or attributes in stolen time data\n"); return -ENXIO; } diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 265fe3eb1069..2df3cd477176 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -114,6 +114,6 @@ long compat_arm_syscall(struct pt_regs *regs, int scno) addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4); arm64_notify_die("Oops - bad compat syscall(2)", regs, - SIGILL, ILL_ILLTRP, addr, scno); + SIGILL, ILL_ILLTRP, addr, 0); return 0; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index a016f07adc28..b3cc51795650 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -418,11 +418,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, vgic_mmio_read_pending, vgic_mmio_write_spending, - NULL, vgic_uaccess_write_spending, 1, + vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, vgic_mmio_read_pending, vgic_mmio_write_cpending, - NULL, vgic_uaccess_write_cpending, 1, + vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, vgic_mmio_read_active, vgic_mmio_write_sactive, diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 9e1459534ce5..5b441777937b 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -226,8 +226,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, return 0; } -unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) +static unsigned long __read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + bool is_user) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); u32 value = 0; @@ -248,7 +249,7 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, IRQCHIP_STATE_PENDING, &val); WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); - } else if (vgic_irq_is_mapped_level(irq)) { + } else if (!is_user && vgic_irq_is_mapped_level(irq)) { val = vgic_get_phys_line_level(irq); } else { val = irq_is_pending(irq); @@ -263,6 +264,18 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, return value; } +unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __read_pending(vcpu, addr, len, false); +} + +unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __read_pending(vcpu, addr, len, true); +} + static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) { return (vgic_irq_is_sgi(irq->intid) && diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h index fefcca2b14dc..dcea44015985 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.h +++ b/arch/arm64/kvm/vgic/vgic-mmio.h @@ -149,6 +149,9 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); +unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 2d881f34dd9d..7b8158ae36ec 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -228,8 +228,6 @@ SYM_FUNC_END_PI(__dma_flush_area) * - dir - DMA direction */ SYM_FUNC_START_PI(__dma_map_area) - cmp w2, #DMA_FROM_DEVICE - b.eq __dma_inv_area b __dma_clean_area SYM_FUNC_END_PI(__dma_map_area) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index b5447e53cd73..0dea80bf6de4 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -16,8 +16,8 @@ void copy_highpage(struct page *to, struct page *from) { - struct page *kto = page_address(to); - struct page *kfrom = page_address(from); + void *kto = page_address(to); + void *kfrom = page_address(from); copy_page(kto, kfrom); diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b08c0b1eb43a..9097e90d07e2 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1112,6 +1112,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_jit_binary_free(header); prog->bpf_func = NULL; prog->jited = 0; + prog->jited_len = 0; goto out_off; } bpf_jit_binary_lock_ro(header); diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c index 589f090f48b9..556b9ba61ec0 100644 --- a/arch/csky/kernel/probes/kprobes.c +++ b/arch/csky/kernel/probes/kprobes.c @@ -28,7 +28,7 @@ static int __kprobes patch_text_cb(void *priv) struct csky_insn_patch *param = priv; unsigned int addr = (unsigned int)param->addr; - if (atomic_inc_return(¶m->cpu_count) == 1) { + if (atomic_inc_return(¶m->cpu_count) == num_online_cpus()) { *(u16 *) addr = cpu_to_le16(param->opcode); dcache_wb_range(addr, addr + 2); atomic_inc(¶m->cpu_count); diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h index 869a3ac6bf23..7ccc077a60be 100644 --- a/arch/ia64/include/asm/timex.h +++ b/arch/ia64/include/asm/timex.h @@ -39,6 +39,7 @@ get_cycles (void) ret = ia64_getreg(_IA64_REG_AR_ITC); return ret; } +#define get_cycles get_cycles extern void ia64_cpu_local_tick (void); extern unsigned long long ia64_native_sched_clock (void); diff --git a/arch/m68k/Kconfig.bus b/arch/m68k/Kconfig.bus index d1e93a39cd3b..f1be832e2b74 100644 --- a/arch/m68k/Kconfig.bus +++ b/arch/m68k/Kconfig.bus @@ -63,7 +63,7 @@ source "drivers/zorro/Kconfig" endif -if COLDFIRE +if !MMU config ISA_DMA_API def_bool !M5272 diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index c17205da47fe..936cd9619bf0 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -312,7 +312,7 @@ comment "Processor Specific Options" config M68KFPU_EMU bool "Math emulation support" - depends on MMU + depends on M68KCLASSIC && FPU help At some point in the future, this will cause floating-point math instructions to be emulated by the kernel on machines that lack a diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index 51a878803fb6..16730561d166 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -321,6 +321,7 @@ comment "Machine Options" config UBOOT bool "Support for U-Boot command line parameters" + depends on COLDFIRE help If you say Y here kernel will try to collect command line parameters from the initial u-boot stack. diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h index 87151d67d91e..bce5ca56c388 100644 --- a/arch/m68k/include/asm/pgtable_no.h +++ b/arch/m68k/include/asm/pgtable_no.h @@ -42,7 +42,8 @@ extern void paging_init(void); * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -#define ZERO_PAGE(vaddr) (virt_to_page(0)) +extern void *empty_zero_page; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) /* * All 32bit addresses are effectively valid for vmalloc... diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h index 80eb2396d01e..3ba40bc1dfaa 100644 --- a/arch/m68k/include/asm/raw_io.h +++ b/arch/m68k/include/asm/raw_io.h @@ -80,14 +80,14 @@ ({ u16 __v = le16_to_cpu(*(__force volatile u16 *) (addr)); __v; }) #define rom_out_8(addr, b) \ - ({u8 __maybe_unused __w, __v = (b); u32 _addr = ((u32) (addr)); \ + (void)({u8 __maybe_unused __w, __v = (b); u32 _addr = ((u32) (addr)); \ __w = ((*(__force volatile u8 *) ((_addr | 0x10000) + (__v<<1)))); }) #define rom_out_be16(addr, w) \ - ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ + (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v & 0xFF)<<1)))); \ __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v >> 8)<<1)))); }) #define rom_out_le16(addr, w) \ - ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ + (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v >> 8)<<1)))); \ __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v & 0xFF)<<1)))); }) diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h index 6a21d9358280..f4a7a340f4ca 100644 --- a/arch/m68k/include/asm/timex.h +++ b/arch/m68k/include/asm/timex.h @@ -35,7 +35,7 @@ static inline unsigned long random_get_entropy(void) { if (mach_random_get_entropy) return mach_random_get_entropy(); - return 0; + return random_get_entropy_fallback(); } #define random_get_entropy random_get_entropy diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index 58f829c9b6c7..79d6fd249583 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -26,7 +26,6 @@ #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 #define cpu_has_tx39_cache 0 -#define cpu_has_fpu 1 #define cpu_has_nofpuex 0 #define cpu_has_32fpr 1 #define cpu_has_counter 1 diff --git a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h index 49a93e82c252..2635b6ba1cb5 100644 --- a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h @@ -29,7 +29,6 @@ #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 #define cpu_has_tx39_cache 0 -#define cpu_has_fpu 1 #define cpu_has_nofpuex 0 #define cpu_has_32fpr 1 #define cpu_has_counter 1 diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index 8026baf46e72..2e107886f97a 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -76,25 +76,24 @@ static inline cycles_t get_cycles(void) else return 0; /* no usable counter */ } +#define get_cycles get_cycles /* * Like get_cycles - but where c0_count is not available we desperately * use c0_random in an attempt to get at least a little bit of entropy. - * - * R6000 and R6000A neither have a count register nor a random register. - * That leaves no entropy source in the CPU itself. */ static inline unsigned long random_get_entropy(void) { - unsigned int prid = read_c0_prid(); - unsigned int imp = prid & PRID_IMP_MASK; + unsigned int c0_random; - if (can_use_mips_counter(prid)) + if (can_use_mips_counter(read_c0_prid())) return read_c0_count(); - else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A)) - return read_c0_random(); + + if (cpu_has_3kex) + c0_random = (read_c0_random() >> 8) & 0x3f; else - return 0; /* no usable register */ + c0_random = read_c0_random() & 0x3f; + return (random_get_entropy_fallback() << 6) | (0x3f - c0_random); } #define random_get_entropy random_get_entropy diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c index 8d2535123f11..d005be84c482 100644 --- a/arch/mips/kernel/mips-cpc.c +++ b/arch/mips/kernel/mips-cpc.c @@ -27,6 +27,7 @@ phys_addr_t __weak mips_cpc_default_phys_base(void) cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc"); if (cpc_node) { err = of_address_to_resource(cpc_node, 0, &res); + of_node_put(cpc_node); if (!err) return res.start; } diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 42222f849bd2..446a2536999b 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -167,6 +167,8 @@ static inline void clkdev_add_sys(const char *dev, unsigned int module, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = NULL; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c index 3d5683e75cf1..200fe9ff641d 100644 --- a/arch/mips/lantiq/xway/gptu.c +++ b/arch/mips/lantiq/xway/gptu.c @@ -122,6 +122,8 @@ static inline void clkdev_add_gptu(struct device *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev_name(dev); clk->cl.con_id = con; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 917fac1636b7..084f6caba5f2 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -315,6 +315,8 @@ static void clkdev_add_pmu(const char *dev, const char *con, bool deactivate, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -338,6 +340,8 @@ static void clkdev_add_cgu(const char *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -356,24 +360,28 @@ static void clkdev_add_pci(void) struct clk *clk_ext = kzalloc(sizeof(struct clk), GFP_KERNEL); /* main pci clock */ - clk->cl.dev_id = "17000000.pci"; - clk->cl.con_id = NULL; - clk->cl.clk = clk; - clk->rate = CLOCK_33M; - clk->rates = valid_pci_rates; - clk->enable = pci_enable; - clk->disable = pmu_disable; - clk->module = 0; - clk->bits = PMU_PCI; - clkdev_add(&clk->cl); + if (clk) { + clk->cl.dev_id = "17000000.pci"; + clk->cl.con_id = NULL; + clk->cl.clk = clk; + clk->rate = CLOCK_33M; + clk->rates = valid_pci_rates; + clk->enable = pci_enable; + clk->disable = pmu_disable; + clk->module = 0; + clk->bits = PMU_PCI; + clkdev_add(&clk->cl); + } /* use internal/external bus clock */ - clk_ext->cl.dev_id = "17000000.pci"; - clk_ext->cl.con_id = "external"; - clk_ext->cl.clk = clk_ext; - clk_ext->enable = pci_ext_enable; - clk_ext->disable = pci_ext_disable; - clkdev_add(&clk_ext->cl); + if (clk_ext) { + clk_ext->cl.dev_id = "17000000.pci"; + clk_ext->cl.con_id = "external"; + clk_ext->cl.clk = clk_ext; + clk_ext->enable = pci_ext_enable; + clk_ext->disable = pci_ext_disable; + clkdev_add(&clk_ext->cl); + } } /* xway socs can generate clocks on gpio pins */ @@ -393,9 +401,15 @@ static void clkdev_add_clkout(void) char *name; name = kzalloc(sizeof("clkout0"), GFP_KERNEL); + if (!name) + continue; sprintf(name, "clkout%d", i); clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) { + kfree(name); + continue; + } clk->cl.dev_id = "1f103000.cgu"; clk->cl.con_id = name; clk->cl.clk = clk; diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 7b7f25b4b057..9240bcdbe74e 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -640,8 +640,6 @@ static int icu_get_irq(unsigned int irq) printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2); - atomic_inc(&irq_err_count); - return -1; } diff --git a/arch/nios2/include/asm/timex.h b/arch/nios2/include/asm/timex.h index a769f871b28d..40a1adc9bd03 100644 --- a/arch/nios2/include/asm/timex.h +++ b/arch/nios2/include/asm/timex.h @@ -8,5 +8,8 @@ typedef unsigned long cycles_t; extern cycles_t get_cycles(void); +#define get_cycles get_cycles + +#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback()) #endif diff --git a/arch/openrisc/include/asm/timex.h b/arch/openrisc/include/asm/timex.h index d52b4e536e3f..5487fa93dd9b 100644 --- a/arch/openrisc/include/asm/timex.h +++ b/arch/openrisc/include/asm/timex.h @@ -23,6 +23,7 @@ static inline cycles_t get_cycles(void) { return mfspr(SPR_TTCR); } +#define get_cycles get_cycles /* This isn't really used any more */ #define CLOCK_TICK_RATE 1000 diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index af355e3f4619..459b0a1e4eb2 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -521,6 +521,15 @@ _start: l.ori r3,r0,0x1 l.mtspr r0,r3,SPR_SR + /* + * Start the TTCR as early as possible, so that the RNG can make use of + * measurements of boot time from the earliest opportunity. Especially + * important is that the TTCR does not return zero by the time we reach + * rand_initialize(). + */ + l.movhi r3,hi(SPR_TTMR_CR) + l.mtspr r0,r3,SPR_TTMR + CLEAR_GPR(r1) CLEAR_GPR(r2) CLEAR_GPR(r3) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index ecfd7af35094..0cf7ce144f43 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -11,6 +11,7 @@ config PARISC select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_MEMORY_FAILURE diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h index c4cd6360f996..55d29c4f716e 100644 --- a/arch/parisc/include/asm/fb.h +++ b/arch/parisc/include/asm/fb.h @@ -12,9 +12,13 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; } +#if defined(CONFIG_FB_STI) +int fb_is_primary_device(struct fb_info *info); +#else static inline int fb_is_primary_device(struct fb_info *info) { return 0; } +#endif #endif /* _ASM_FB_H_ */ diff --git a/arch/parisc/include/asm/timex.h b/arch/parisc/include/asm/timex.h index 06b510f8172e..b4622cb06a75 100644 --- a/arch/parisc/include/asm/timex.h +++ b/arch/parisc/include/asm/timex.h @@ -13,9 +13,10 @@ typedef unsigned long cycles_t; -static inline cycles_t get_cycles (void) +static inline cycles_t get_cycles(void) { return mfctl(16); } +#define get_cycles get_cycles #endif diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5afa0ebd78ca..78dd6be8b31d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -786,7 +786,6 @@ config THREAD_SHIFT range 13 15 default "15" if PPC_256K_PAGES default "14" if PPC64 - default "14" if KASAN default "13" help Used to define the stack size. The default is almost always what you diff --git a/arch/powerpc/include/asm/bpf_perf_event.h b/arch/powerpc/include/asm/bpf_perf_event.h new file mode 100644 index 000000000000..e8a7b4ffb58c --- /dev/null +++ b/arch/powerpc/include/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BPF_PERF_EVENT_H +#define _ASM_POWERPC_BPF_PERF_EVENT_H + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _ASM_POWERPC_BPF_PERF_EVENT_H */ diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index bc76970b6ee5..e647dfcb3191 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -96,7 +96,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name #endif /* PPC64_ELF_ABI_v1 */ #endif /* CONFIG_FTRACE_SYSCALLS */ -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER) #include static inline void this_cpu_disable_ftrace(void) @@ -120,11 +120,13 @@ static inline u8 this_cpu_get_ftrace_enabled(void) return get_paca()->ftrace_enabled; } +void ftrace_free_init_tramp(void); #else /* CONFIG_PPC64 */ static inline void this_cpu_disable_ftrace(void) { } static inline void this_cpu_enable_ftrace(void) { } static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { } static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } +static inline void ftrace_free_init_tramp(void) { } #endif /* CONFIG_PPC64 */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index f2c5c26869f1..03ae544eb6cc 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -216,6 +216,9 @@ static inline bool pfn_valid(unsigned long pfn) #define __pa(x) ((phys_addr_t)(unsigned long)(x) - VIRT_PHYS_OFFSET) #else #ifdef CONFIG_PPC64 + +#define VIRTUAL_WARN_ON(x) WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && (x)) + /* * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit. @@ -223,13 +226,13 @@ static inline bool pfn_valid(unsigned long pfn) */ #define __va(x) \ ({ \ - VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); \ + VIRTUAL_WARN_ON((unsigned long)(x) >= PAGE_OFFSET); \ (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); \ }) #define __pa(x) \ ({ \ - VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); \ + VIRTUAL_WARN_ON((unsigned long)(x) < PAGE_OFFSET); \ (unsigned long)(x) & 0x0fffffffffffffffUL; \ }) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index f0c0816f5727..d6a3cd147059 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -212,6 +212,7 @@ #define PPC_INST_COPY 0x7c20060c #define PPC_INST_DCBA 0x7c0005ec #define PPC_INST_DCBA_MASK 0xfc0007fe +#define PPC_INST_DSSALL 0x7e00066c #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LSWI 0x7c0004aa @@ -517,6 +518,7 @@ #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_RAW_DCBZL(a, b)) #define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b)) #define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b)) +#define PPC_DSSALL stringify_in_c(.long PPC_INST_DSSALL) #define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh)) #define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LDARX(t, a, b, eh)) #define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LWARX(t, a, b, eh)) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 46a210b03d2b..6de3517bea94 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -14,10 +14,16 @@ #ifdef __KERNEL__ -#if defined(CONFIG_VMAP_STACK) && CONFIG_THREAD_SHIFT < PAGE_SHIFT +#ifdef CONFIG_KASAN +#define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1) +#else +#define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT +#endif + +#if defined(CONFIG_VMAP_STACK) && MIN_THREAD_SHIFT < PAGE_SHIFT #define THREAD_SHIFT PAGE_SHIFT #else -#define THREAD_SHIFT CONFIG_THREAD_SHIFT +#define THREAD_SHIFT MIN_THREAD_SHIFT #endif #define THREAD_SIZE (1 << THREAD_SHIFT) diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 95988870a57b..171602fd358e 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -19,6 +19,7 @@ static inline cycles_t get_cycles(void) { return mftb(); } +#define get_cycles get_cycles #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_TIMEX_H */ diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index e33f80b0ea81..47062b457049 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -52,7 +52,7 @@ enum vas_cop_type { * Receive window attributes specified by the (in-kernel) owner of window. */ struct vas_rx_win_attr { - void *rx_fifo; + u64 rx_fifo; int rx_fifo_size; int wcreds_max; diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h deleted file mode 100644 index 5e1e648aeec4..000000000000 --- a/arch/powerpc/include/uapi/asm/bpf_perf_event.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ -#define _UAPI__ASM_BPF_PERF_EVENT_H__ - -#include - -typedef struct user_pt_regs bpf_user_pt_regs_t; - -#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index c3bb800dc435..1a5ba26aab15 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -861,7 +861,6 @@ static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info) sizeof(struct fadump_memory_range)); return 0; } - static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, u64 base, u64 end) { @@ -880,7 +879,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, start = mem_ranges[mrange_info->mem_range_cnt - 1].base; size = mem_ranges[mrange_info->mem_range_cnt - 1].size; - if ((start + size) == base) + /* + * Boot memory area needs separate PT_LOAD segment(s) as it + * is moved to a different location at the time of crash. + * So, fold only if the region is not boot memory area. + */ + if ((start + size) == base && start >= fw_dump.boot_mem_top) is_adjacent = true; } if (!is_adjacent) { diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 1f835539fda4..77cd4c5a2d63 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -37,7 +37,7 @@ static int __init powersave_off(char *arg) { ppc_md.power_save = NULL; cpuidle_disable = IDLE_POWERSAVE_OFF; - return 0; + return 1; } __setup("powersave=off", powersave_off); @@ -82,7 +82,7 @@ void power4_idle(void) return; if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile("DSSALL ; sync" ::: "memory"); + asm volatile(PPC_DSSALL " ; sync" ::: "memory"); power4_idle_nap(); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 69df840f7253..315e5e2ad703 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -129,7 +129,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) mtspr SPRN_HID0,r4 BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */ diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 225511d73bef..f2e03ed423d0 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR) /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -292,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) isync /* Stop DST streams */ - DSSALL + PPC_DSSALL sync /* Get the current enable bit of the L3CR into r4 */ @@ -401,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR) _GLOBAL(__flush_disable_L1) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3064694afea1..c43cc26bde5d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1800,7 +1800,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) tm_reclaim_current(0); #endif - memset(regs->gpr, 0, sizeof(regs->gpr)); + memset(®s->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0])); regs->ctr = 0; regs->link = 0; regs->xer = 0; @@ -2108,12 +2108,12 @@ static unsigned long __get_wchan(struct task_struct *p) return 0; do { - sp = *(unsigned long *)sp; + sp = READ_ONCE_NOCHECK(*(unsigned long *)sp); if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) || p->state == TASK_RUNNING) return 0; if (count > 0) { - ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE]; + ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]); if (!in_sched_functions(ip)) return ip; } diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index b183ab9c5107..dfa5f729f774 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -13,7 +13,7 @@ # If you really need to reference something from prom_init.o add # it to the list below: -grep "^CONFIG_KASAN=y$" .config >/dev/null +grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null if [ $? -eq 0 ] then MEM_FUNCS="__memcpy __memset" diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index f6e51be47c6e..9ea9ee513ae1 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -75,8 +75,13 @@ long arch_ptrace(struct task_struct *child, long request, flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - sizeof(long)); + if (IS_ENABLED(CONFIG_PPC32)) { + // On 32-bit the index we are passed refers to 32-bit words + tmp = ((u32 *)child->thread.fp_state.fpr)[fpidx]; + } else { + memcpy(&tmp, &child->thread.TS_FPR(fpidx), + sizeof(long)); + } else tmp = child->thread.fp_state.fpscr; } @@ -108,8 +113,13 @@ long arch_ptrace(struct task_struct *child, long request, flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - sizeof(long)); + if (IS_ENABLED(CONFIG_PPC32)) { + // On 32-bit the index we are passed refers to 32-bit words + ((u32 *)child->thread.fp_state.fpr)[fpidx] = data; + } else { + memcpy(&child->thread.TS_FPR(fpidx), &data, + sizeof(long)); + } else child->thread.fp_state.fpscr = data; ret = 0; @@ -478,4 +488,7 @@ void __init pt_regs_check(void) * real registers. */ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); + + // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX)); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index cf421eb7f90d..bf962051af0a 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1040,7 +1040,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "get-time-of-day", -1, -1, -1, -1, -1 }, { "ibm,get-vpd", -1, 0, -1, 1, 2 }, { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, - { "ibm,platform-dump", -1, 4, 5, -1, -1 }, + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */ { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, @@ -1087,6 +1087,15 @@ static bool block_rtas_call(int token, int nargs, size = 1; end = base + size - 1; + + /* + * Special case for ibm,platform-dump - NULL buffer + * address is used to indicate end of dump processing + */ + if (!strcmp(f->name, "ibm,platform-dump") && + base == 0) + return false; + if (!in_rmo_buf(base, end)) goto err; } diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index f73f4d72fea4..e0cbd63007f2 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume) #ifdef CONFIG_ALTIVEC /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif sync diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 6d3189830dd3..068a268a8013 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -142,7 +142,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) _GLOBAL(swsusp_arch_resume) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 42761ebec9f7..d24aea4fed7a 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -336,9 +336,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) /* Is this a known long jump tramp? */ for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (!ftrace_tramps[i]) - break; - else if (ftrace_tramps[i] == tramp) + if (ftrace_tramps[i] == tramp) return 0; /* Is this a known plt tramp? */ @@ -882,6 +880,17 @@ void arch_ftrace_update_code(int command) extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; +void ftrace_free_init_tramp(void) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++) + if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) { + ftrace_tramps[i] = 0; + return; + } +} + int __init ftrace_dyn_arch_init(void) { int i; diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 84e5a2dc8be5..3dd58b4ee33e 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -359,13 +359,15 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, struct kvm *kvm, unsigned long *gfn) { - struct kvmppc_uvmem_slot *p; + struct kvmppc_uvmem_slot *p = NULL, *iter; bool ret = false; unsigned long i; - list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) - if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns) + list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list) + if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) { + p = iter; break; + } if (!p) return ret; /* diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 22eb1c718e62..1ed276d2305f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -51,6 +51,7 @@ #include #include #include +#include #include @@ -347,6 +348,7 @@ void free_initmem(void) mark_initmem_nx(); init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); + ftrace_free_init_tramp(); } /** diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 18f20da0d348..64290d343b55 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -79,7 +79,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * context */ if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile ("dssall"); + asm volatile (PPC_DSSALL); if (new_on_cpu) radix_kvm_prefetch_workaround(next); diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index 77884e24281d..3d845e001c87 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -95,8 +95,8 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) pgdp = pgd_offset_k(ea); p4dp = p4d_offset(pgdp, ea); if (p4d_none(*p4dp)) { - pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); - p4d_populate(&init_mm, p4dp, pmdp); + pudp = early_alloc_pgtable(PUD_TABLE_SIZE); + p4d_populate(&init_mm, p4dp, pudp); } pudp = pud_offset(p4dp, ea); if (pud_none(*pudp)) { @@ -105,7 +105,7 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) } pmdp = pmd_offset(pudp, ea); if (!pmd_present(*pmdp)) { - ptep = early_alloc_pgtable(PAGE_SIZE); + ptep = early_alloc_pgtable(PTE_TABLE_SIZE); pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, ea); diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 4c74e8a5482b..c555ad9fa00b 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -18,7 +18,6 @@ #include #include #include -#include #include struct regions { @@ -36,10 +35,6 @@ struct regions { int reserved_mem_size_cells; }; -/* Simplified build-specific string for starting entropy. */ -static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" - LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; - struct regions __initdata regions; static __init void kaslr_get_cmdline(void *fdt) @@ -72,7 +67,8 @@ static unsigned long __init get_boot_seed(void *fdt) { unsigned long hash = 0; - hash = rotate_xor(hash, build_str, sizeof(build_str)); + /* build-specific string for starting entropy. */ + hash = rotate_xor(hash, linux_banner, strlen(linux_banner)); hash = rotate_xor(hash, fdt, fdt_totalsize(fdt)); return hash; diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 58448f0e4721..52990becbdfc 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -363,7 +363,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { mask |= CNST_THRESH_MASK; value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); - } + } else if (event_is_threshold(event)) + return -1; } else { /* * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index ae8b812c9202..2481e78c0423 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -327,6 +327,6 @@ late_initcall(cpm_init); static int __init cpm_powersave_off(char *arg) { cpm.powersave_off = 1; - return 0; + return 1; } __setup("powersave=off", cpm_powersave_off); diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index c58b6f1c40e3..3ef5e9fd3a9b 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -280,6 +280,7 @@ cpm_setbrg(uint brg, uint rate) out_be32(bp, (((BRG_UART_CLK_DIV16 / rate) - 1) << 1) | CPM_BRG_EN | CPM_BRG_DIV16); } +EXPORT_SYMBOL(cpm_setbrg); struct cpm_ioport16 { __be16 dir, par, odr_sor, dat, intr; diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index ced225415486..b8ae56e9f414 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -48,7 +48,7 @@ flush_disable_75x: /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -197,7 +197,7 @@ flush_disable_745x: isync /* Stop prefetch streams */ - DSSALL + PPC_DSSALL sync /* Disable L2 prefetching */ diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index 9a360ced663b..e23a51a05f99 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -60,7 +60,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) addr = be64_to_cpu(addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = (void *)addr; - if (opal_fdm_active->registered_regions == 0) + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) return; ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); @@ -95,17 +95,17 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf); static void opal_fadump_update_config(struct fw_dump *fadump_conf, const struct opal_fadump_mem_struct *fdm) { - pr_debug("Boot memory regions count: %d\n", fdm->region_cnt); + pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt)); /* * The destination address of the first boot memory region is the * destination address of boot memory regions. */ - fadump_conf->boot_mem_dest_addr = fdm->rgn[0].dest; + fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest); pr_debug("Destination address of boot memory regions: %#016llx\n", fadump_conf->boot_mem_dest_addr); - fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr; + fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr); } /* @@ -126,9 +126,9 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, fadump_conf->boot_memory_size = 0; pr_debug("Boot memory regions:\n"); - for (i = 0; i < fdm->region_cnt; i++) { - base = fdm->rgn[i].src; - size = fdm->rgn[i].size; + for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) { + base = be64_to_cpu(fdm->rgn[i].src); + size = be64_to_cpu(fdm->rgn[i].size); pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size); fadump_conf->boot_mem_addr[i] = base; @@ -143,7 +143,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, * Start address of reserve dump area (permanent reservation) for * re-registering FADump after dump capture. */ - fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest; + fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest); /* * Rarely, but it can so happen that system crashes before all @@ -155,13 +155,14 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, * Hope the memory that could not be preserved only has pages * that are usually filtered out while saving the vmcore. */ - if (fdm->region_cnt > fdm->registered_regions) { + if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) { pr_warn("Not all memory regions were saved!!!\n"); pr_warn(" Unsaved memory regions:\n"); - i = fdm->registered_regions; - while (i < fdm->region_cnt) { + i = be16_to_cpu(fdm->registered_regions); + while (i < be16_to_cpu(fdm->region_cnt)) { pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n", - i, fdm->rgn[i].src, fdm->rgn[i].size); + i, be64_to_cpu(fdm->rgn[i].src), + be64_to_cpu(fdm->rgn[i].size)); i++; } @@ -170,7 +171,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, } fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size); - fadump_conf->boot_mem_regs_cnt = fdm->region_cnt; + fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt); opal_fadump_update_config(fadump_conf, fdm); } @@ -178,35 +179,38 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm) { fdm->version = OPAL_FADUMP_VERSION; - fdm->region_cnt = 0; - fdm->registered_regions = 0; - fdm->fadumphdr_addr = 0; + fdm->region_cnt = cpu_to_be16(0); + fdm->registered_regions = cpu_to_be16(0); + fdm->fadumphdr_addr = cpu_to_be64(0); } static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf) { u64 addr = fadump_conf->reserve_dump_area_start; + u16 reg_cnt; int i; opal_fdm = __va(fadump_conf->kernel_metadata); opal_fadump_init_metadata(opal_fdm); /* Boot memory regions */ + reg_cnt = be16_to_cpu(opal_fdm->region_cnt); for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) { - opal_fdm->rgn[i].src = fadump_conf->boot_mem_addr[i]; - opal_fdm->rgn[i].dest = addr; - opal_fdm->rgn[i].size = fadump_conf->boot_mem_sz[i]; + opal_fdm->rgn[i].src = cpu_to_be64(fadump_conf->boot_mem_addr[i]); + opal_fdm->rgn[i].dest = cpu_to_be64(addr); + opal_fdm->rgn[i].size = cpu_to_be64(fadump_conf->boot_mem_sz[i]); - opal_fdm->region_cnt++; + reg_cnt++; addr += fadump_conf->boot_mem_sz[i]; } + opal_fdm->region_cnt = cpu_to_be16(reg_cnt); /* * Kernel metadata is passed to f/w and retrieved in capture kerenl. * So, use it to save fadump header address instead of calculating it. */ - opal_fdm->fadumphdr_addr = (opal_fdm->rgn[0].dest + - fadump_conf->boot_memory_size); + opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) + + fadump_conf->boot_memory_size); opal_fadump_update_config(fadump_conf, opal_fdm); @@ -269,18 +273,21 @@ static u64 opal_fadump_get_bootmem_min(void) static int opal_fadump_register(struct fw_dump *fadump_conf) { s64 rc = OPAL_PARAMETER; + u16 registered_regs; int i, err = -EIO; - for (i = 0; i < opal_fdm->region_cnt; i++) { + registered_regs = be16_to_cpu(opal_fdm->registered_regions); + for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) { rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE, - opal_fdm->rgn[i].src, - opal_fdm->rgn[i].dest, - opal_fdm->rgn[i].size); + be64_to_cpu(opal_fdm->rgn[i].src), + be64_to_cpu(opal_fdm->rgn[i].dest), + be64_to_cpu(opal_fdm->rgn[i].size)); if (rc != OPAL_SUCCESS) break; - opal_fdm->registered_regions++; + registered_regs++; } + opal_fdm->registered_regions = cpu_to_be16(registered_regs); switch (rc) { case OPAL_SUCCESS: @@ -291,7 +298,8 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) case OPAL_RESOURCE: /* If MAX regions limit in f/w is hit, warn and proceed. */ pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n", - (opal_fdm->region_cnt - opal_fdm->registered_regions)); + (be16_to_cpu(opal_fdm->region_cnt) - + be16_to_cpu(opal_fdm->registered_regions))); fadump_conf->dump_registered = 1; err = 0; break; @@ -312,7 +320,7 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) * If some regions were registered before OPAL_MPIPL_ADD_RANGE * OPAL call failed, unregister all regions. */ - if ((err < 0) && (opal_fdm->registered_regions > 0)) + if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0)) opal_fadump_unregister(fadump_conf); return err; @@ -328,7 +336,7 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf) return -EIO; } - opal_fdm->registered_regions = 0; + opal_fdm->registered_regions = cpu_to_be16(0); fadump_conf->dump_registered = 0; return 0; } @@ -563,19 +571,20 @@ static void opal_fadump_region_show(struct fw_dump *fadump_conf, else fdm_ptr = opal_fdm; - for (i = 0; i < fdm_ptr->region_cnt; i++) { + for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) { /* * Only regions that are registered for MPIPL * would have dump data. */ if ((fadump_conf->dump_active) && - (i < fdm_ptr->registered_regions)) - dumped_bytes = fdm_ptr->rgn[i].size; + (i < be16_to_cpu(fdm_ptr->registered_regions))) + dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size); seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", - fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest); + be64_to_cpu(fdm_ptr->rgn[i].src), + be64_to_cpu(fdm_ptr->rgn[i].dest)); seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", - fdm_ptr->rgn[i].size, dumped_bytes); + be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes); } /* Dump is active. Show reserved area start address. */ @@ -624,6 +633,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { const __be32 *prop; unsigned long dn; + __be64 be_addr; u64 addr = 0; int i, len; s64 ret; @@ -680,13 +690,13 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) if (!prop) return; - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); - if ((ret != OPAL_SUCCESS) || !addr) { + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr); + if ((ret != OPAL_SUCCESS) || !be_addr) { pr_err("Failed to get Kernel metadata (%lld)\n", ret); return; } - addr = be64_to_cpu(addr); + addr = be64_to_cpu(be_addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = __va(addr); @@ -697,14 +707,14 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) } /* Kernel regions not registered with f/w for MPIPL */ - if (opal_fdm_active->registered_regions == 0) { + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) { opal_fdm_active = NULL; return; } - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr); - if (addr) { - addr = be64_to_cpu(addr); + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr); + if (be_addr) { + addr = be64_to_cpu(be_addr); pr_debug("CPU metadata addr: %llx\n", addr); opal_cpu_metadata = __va(addr); } diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h index f1e9ecf548c5..3f715efb0aa6 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.h +++ b/arch/powerpc/platforms/powernv/opal-fadump.h @@ -31,14 +31,14 @@ * OPAL FADump kernel metadata * * The address of this structure will be registered with f/w for retrieving - * and processing during crash dump. + * in the capture kernel to process the crash dump. */ struct opal_fadump_mem_struct { u8 version; u8 reserved[3]; - u16 region_cnt; /* number of regions */ - u16 registered_regions; /* Regions registered for MPIPL */ - u64 fadumphdr_addr; + __be16 region_cnt; /* number of regions */ + __be16 registered_regions; /* Regions registered for MPIPL */ + __be64 fadumphdr_addr; struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS]; } __packed; @@ -135,7 +135,7 @@ static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt, for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) { reg_entry = (struct hdat_fadump_reg_entry *)bufp; val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) : - reg_entry->reg_val); + (u64)(reg_entry->reg_val)); opal_fadump_set_regval_regnum(regs, be32_to_cpu(reg_entry->reg_type), be32_to_cpu(reg_entry->reg_num), diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 11df4e16a1cc..528946ee7a77 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -42,4 +42,6 @@ ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); u32 memcons_get_size(struct memcons *mc); struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name); +void pnv_rng_init(void); + #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 69c344c8884f..236bd2ba51b9 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -17,6 +17,7 @@ #include #include #include +#include "powernv.h" #define DARN_ERR 0xFFFFFFFFFFFFFFFFul @@ -28,7 +29,6 @@ struct powernv_rng { static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); - int powernv_hwrng_present(void) { struct powernv_rng *rng; @@ -98,9 +98,6 @@ static int initialise_darn(void) return 0; } } - - pr_warn("Unable to use DARN for get_random_seed()\n"); - return -EIO; } @@ -163,32 +160,59 @@ static __init int rng_create(struct device_node *dn) rng_init_per_cpu(rng, dn); - pr_info_once("Registering arch random hook.\n"); - ppc_md.get_random_seed = powernv_get_random_long; return 0; } -static __init int rng_init(void) +static int __init pnv_get_random_long_early(unsigned long *v) { struct device_node *dn; - int rc; - for_each_compatible_node(dn, NULL, "ibm,power-rng") { - rc = rng_create(dn); - if (rc) { - pr_err("Failed creating rng for %pOF (%d).\n", - dn, rc); - continue; - } + if (!slab_is_available()) + return 0; - /* Create devices for hwrng driver */ - of_platform_device_create(dn, NULL, NULL); + if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early, + NULL) != pnv_get_random_long_early) + return 0; + + for_each_compatible_node(dn, NULL, "ibm,power-rng") + rng_create(dn); + + if (!ppc_md.get_random_seed) + return 0; + return ppc_md.get_random_seed(v); +} + +void __init pnv_rng_init(void) +{ + struct device_node *dn; + + /* Prefer darn over the rest. */ + if (!initialise_darn()) + return; + + dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng"); + if (dn) + ppc_md.get_random_seed = pnv_get_random_long_early; + + of_node_put(dn); +} + +static int __init pnv_rng_late_init(void) +{ + struct device_node *dn; + unsigned long v; + + /* In case it wasn't called during init for some other reason. */ + if (ppc_md.get_random_seed == pnv_get_random_long_early) + pnv_get_random_long_early(&v); + + if (ppc_md.get_random_seed == powernv_get_random_long) { + for_each_compatible_node(dn, NULL, "ibm,power-rng") + of_platform_device_create(dn, NULL, NULL); } - initialise_darn(); - return 0; } -machine_subsys_initcall(powernv, rng_init); +machine_subsys_initcall(powernv, pnv_rng_late_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 4426a109ec2f..1a2f12dc0552 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -193,6 +193,8 @@ static void __init pnv_setup_arch(void) pnv_check_guarded_cores(); /* XXX PMCS */ + + pnv_rng_init(); } static void __init pnv_init(void) diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c index e4a00ad06f9d..67c8c4b2d8b1 100644 --- a/arch/powerpc/platforms/powernv/ultravisor.c +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -55,6 +55,7 @@ static int __init uv_init(void) return -ENODEV; uv_memcons = memcons_init(node, "memcons"); + of_node_put(node); if (!uv_memcons) return -ENOENT; diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 3d21fce254b7..dd9c23c09781 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -352,7 +352,7 @@ int vas_setup_fault_window(struct vas_instance *vinst) vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT); attr.rx_fifo_size = vinst->fault_fifo_size; - attr.rx_fifo = vinst->fault_fifo; + attr.rx_fifo = __pa(vinst->fault_fifo); /* * Max creds is based on number of CRBs can fit in the FIFO. diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 7ba0840fc3b5..3a86cdd5ae6c 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -403,7 +403,7 @@ static void init_winctx_regs(struct vas_window *window, * * See also: Design note in function header. */ - val = __pa(winctx->rx_fifo); + val = winctx->rx_fifo; val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0); write_hvwc_reg(window, VREG(LFIFO_BAR), val); @@ -737,7 +737,7 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin, */ winctx->fifo_disable = true; winctx->intr_disable = true; - winctx->rx_fifo = NULL; + winctx->rx_fifo = 0; } winctx->lnotify_lpid = rxattr->lnotify_lpid; diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 70f793e8f6cc..1f6e73809205 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -383,7 +383,7 @@ struct vas_window { * is a container for the register fields in the window context. */ struct vas_winctx { - void *rx_fifo; + u64 rx_fifo; int rx_fifo_size; int wcreds_max; int rsvd_txbuf_count; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 593840847cd3..ada9601aaff1 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -114,4 +114,6 @@ int dlpar_workqueue_init(void); void pseries_setup_security_mitigations(void); void pseries_lpar_read_hblkrm_characteristics(void); +void pseries_rng_init(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index 6268545947b8..6ddfdeaace9e 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -10,6 +10,7 @@ #include #include #include +#include "pseries.h" static int pseries_get_random_long(unsigned long *v) @@ -24,19 +25,13 @@ static int pseries_get_random_long(unsigned long *v) return 0; } -static __init int rng_init(void) +void __init pseries_rng_init(void) { struct device_node *dn; dn = of_find_compatible_node(NULL, NULL, "ibm,random"); if (!dn) - return -ENODEV; - - pr_info("Registering arch random hook.\n"); - + return; ppc_md.get_random_seed = pseries_get_random_long; - of_node_put(dn); - return 0; } -machine_subsys_initcall(pseries, rng_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 47dfada140e1..0eac9ca782c2 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -824,6 +824,8 @@ static void __init pSeries_setup_arch(void) if (swiotlb_force == SWIOTLB_FORCE) ppc_swiotlb_enable = 1; + + pseries_rng_init(); } static void pseries_panic(char *str) diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 6b4a34b36d98..8ff9bcfe4b8d 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -403,9 +403,10 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) } /* Initialize the DART HW */ - if (dart_init(dn) != 0) + if (dart_init(dn) != 0) { + of_node_put(dn); return; - + } /* * U4 supports a DART bypass, we use it for 64-bit capable devices to * improve performance. However, that only works for devices connected @@ -418,6 +419,7 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) /* Setup pci_dma ops */ set_pci_dma_ops(&dma_iommu_ops); + of_node_put(dn); } #ifdef CONFIG_PM diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 07c164f7f8cf..3f9f78621cf3 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -505,8 +505,10 @@ int fsl_rio_setup(struct platform_device *dev) if (rc) { dev_err(&dev->dev, "Can't get %pOF property 'reg'\n", rmu_node); + of_node_put(rmu_node); goto err_rmu; } + of_node_put(rmu_node); rmu_regs_win = ioremap(rmu_regs.start, resource_size(&rmu_regs)); if (!rmu_regs_win) { dev_err(&dev->dev, "Unable to map rmu register window\n"); diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 68fd2540b093..7fa520efcefa 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -195,6 +195,7 @@ int icp_opal_init(void) printk("XICS: Using OPAL ICP fallbacks\n"); + of_node_put(np); return 0; } diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index db9505c658ea..3d3016092b31 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -73,6 +73,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) endif KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) +KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) # GCC versions that support the "-mstrict-align" option default to allowing # unaligned accesses. While unaligned accesses are explicitly allowed in the diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 7db861053483..64c06c9b41dc 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -166,7 +166,7 @@ uart0: serial@10010000 { clocks = <&prci PRCI_CLK_TLCLK>; status = "disabled"; }; - dma: dma@3000000 { + dma: dma-controller@3000000 { compatible = "sifive,fu540-c000-pdma"; reg = <0x0 0x3000000 0x0 0x8000>; interrupt-parent = <&plic0>; diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h index d6c277992f76..b53891964ae0 100644 --- a/arch/riscv/include/asm/irq_work.h +++ b/arch/riscv/include/asm/irq_work.h @@ -4,7 +4,7 @@ static inline bool arch_irq_work_has_interrupt(void) { - return true; + return IS_ENABLED(CONFIG_SMP); } extern void arch_irq_work_raise(void); #endif /* _ASM_RISCV_IRQ_WORK_H */ diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 81de51e6aa32..a06697846e69 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -41,7 +41,7 @@ static inline u32 get_cycles_hi(void) static inline unsigned long random_get_entropy(void) { if (unlikely(clint_time_val == NULL)) - return 0; + return random_get_entropy_fallback(); return get_cycles(); } #define random_get_entropy() random_get_entropy() diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c index 024159298231..1aa540350abd 100644 --- a/arch/riscv/kernel/efi.c +++ b/arch/riscv/kernel/efi.c @@ -65,7 +65,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) if (md->attribute & EFI_MEMORY_RO) { val = pte_val(pte) & ~_PAGE_WRITE; - val = pte_val(pte) | _PAGE_READ; + val |= _PAGE_READ; pte = __pte(val); } if (md->attribute & EFI_MEMORY_XP) { diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 1a819c18bede..47d1411db0a9 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -261,6 +261,7 @@ clear_bss_done: REG_S a0, (a2) /* Initialize page tables and relocate to virtual addresses */ + la tp, init_task la sp, init_thread_union + THREAD_SIZE mv a0, s1 call setup_vm diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 896b68e541b2..878993982e39 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -507,7 +507,6 @@ config KEXEC config KEXEC_FILE bool "kexec file based system call" select KEXEC_CORE - select BUILD_BIN2C depends on CRYPTO depends on CRYPTO_SHA256 depends on CRYPTO_SHA256_S390 diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 54c7536f2482..1023e9d43d44 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -701,7 +701,7 @@ static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw, unsigned int nbytes) { gw->walk_bytes_remain -= nbytes; - scatterwalk_unmap(&gw->walk); + scatterwalk_unmap(gw->walk_ptr); scatterwalk_advance(&gw->walk, nbytes); scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); gw->walk_ptr = NULL; @@ -776,7 +776,7 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) goto out; } - scatterwalk_unmap(&gw->walk); + scatterwalk_unmap(gw->walk_ptr); gw->walk_ptr = NULL; gw->ptr = gw->buf; diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 4cbb4b6d85a8..1f2d40993c4d 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -2,126 +2,17 @@ /* * s390 arch random implementation. * - * Copyright IBM Corp. 2017, 2018 + * Copyright IBM Corp. 2017, 2020 * Author(s): Harald Freudenberger - * - * The s390_arch_random_generate() function may be called from random.c - * in interrupt context. So this implementation does the best to be very - * fast. There is a buffer of random data which is asynchronously checked - * and filled by a workqueue thread. - * If there are enough bytes in the buffer the s390_arch_random_generate() - * just delivers these bytes. Otherwise false is returned until the - * worker thread refills the buffer. - * The worker fills the rng buffer by pulling fresh entropy from the - * high quality (but slow) true hardware random generator. This entropy - * is then spread over the buffer with an pseudo random generator PRNG. - * As the arch_get_random_seed_long() fetches 8 bytes and the calling - * function add_interrupt_randomness() counts this as 1 bit entropy the - * distribution needs to make sure there is in fact 1 bit entropy contained - * in 8 bytes of the buffer. The current values pull 32 byte entropy - * and scatter this into a 2048 byte buffer. So 8 byte in the buffer - * will contain 1 bit of entropy. - * The worker thread is rescheduled based on the charge level of the - * buffer but at least with 500 ms delay to avoid too much CPU consumption. - * So the max. amount of rng data delivered via arch_get_random_seed is - * limited to 4k bytes per second. */ #include #include #include -#include #include -#include #include DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); EXPORT_SYMBOL(s390_arch_random_counter); - -#define ARCH_REFILL_TICKS (HZ/2) -#define ARCH_PRNG_SEED_SIZE 32 -#define ARCH_RNG_BUF_SIZE 2048 - -static DEFINE_SPINLOCK(arch_rng_lock); -static u8 *arch_rng_buf; -static unsigned int arch_rng_buf_idx; - -static void arch_rng_refill_buffer(struct work_struct *); -static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer); - -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes) -{ - /* max hunk is ARCH_RNG_BUF_SIZE */ - if (nbytes > ARCH_RNG_BUF_SIZE) - return false; - - /* lock rng buffer */ - if (!spin_trylock(&arch_rng_lock)) - return false; - - /* try to resolve the requested amount of bytes from the buffer */ - arch_rng_buf_idx -= nbytes; - if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) { - memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes); - atomic64_add(nbytes, &s390_arch_random_counter); - spin_unlock(&arch_rng_lock); - return true; - } - - /* not enough bytes in rng buffer, refill is done asynchronously */ - spin_unlock(&arch_rng_lock); - - return false; -} -EXPORT_SYMBOL(s390_arch_random_generate); - -static void arch_rng_refill_buffer(struct work_struct *unused) -{ - unsigned int delay = ARCH_REFILL_TICKS; - - spin_lock(&arch_rng_lock); - if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) { - /* buffer is exhausted and needs refill */ - u8 seed[ARCH_PRNG_SEED_SIZE]; - u8 prng_wa[240]; - /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */ - cpacf_trng(NULL, 0, seed, sizeof(seed)); - /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */ - memset(prng_wa, 0, sizeof(prng_wa)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, - &prng_wa, NULL, 0, seed, sizeof(seed)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, - &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0); - arch_rng_buf_idx = ARCH_RNG_BUF_SIZE; - } - delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE; - spin_unlock(&arch_rng_lock); - - /* kick next check */ - queue_delayed_work(system_long_wq, &arch_rng_work, delay); -} - -static int __init s390_arch_random_init(void) -{ - /* all the needed PRNO subfunctions available ? */ - if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) && - cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { - - /* alloc arch random working buffer */ - arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL); - if (!arch_rng_buf) - return -ENOMEM; - - /* kick worker queue job to fill the random buffer */ - queue_delayed_work(system_long_wq, - &arch_rng_work, ARCH_REFILL_TICKS); - - /* enable arch random to the outside world */ - static_branch_enable(&s390_arch_random_available); - } - - return 0; -} -arch_initcall(s390_arch_random_init); diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index de61ce562052..4120c428dc37 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -2,7 +2,7 @@ /* * Kernel interface for the s390 arch_random_* functions * - * Copyright IBM Corp. 2017 + * Copyright IBM Corp. 2017, 2022 * * Author: Harald Freudenberger * @@ -14,13 +14,13 @@ #ifdef CONFIG_ARCH_RANDOM #include +#include #include +#include DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); - static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; @@ -33,16 +33,22 @@ static inline bool __must_check arch_get_random_int(unsigned int *v) static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { - if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { - if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 7f3c9ac34bd8..63098df81c9f 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -9,6 +9,8 @@ #ifndef _S390_KEXEC_H #define _S390_KEXEC_H +#include + #include #include #include @@ -83,4 +85,12 @@ struct kimage_arch { extern const struct kexec_file_ops s390_kexec_image_ops; extern const struct kexec_file_ops s390_kexec_elf_ops; +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif #endif /*_S390_KEXEC_H */ diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index b5f545db461a..60e101b8460c 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -46,10 +46,17 @@ static inline bool test_preempt_need_resched(void) static inline void __preempt_count_add(int val) { - if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) - __atomic_add_const(val, &S390_lowcore.preempt_count); - else - __atomic_add(val, &S390_lowcore.preempt_count); + /* + * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES + * enabled, gcc 12 fails to handle __builtin_constant_p(). + */ + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) { + if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) { + __atomic_add_const(val, &S390_lowcore.preempt_count); + return; + } + } + __atomic_add(val, &S390_lowcore.preempt_count); } static inline void __preempt_count_sub(int val) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 289aaff4d365..588aa0f2c842 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -172,6 +172,7 @@ static inline cycles_t get_cycles(void) { return (cycles_t) get_tod_clock() >> 2; } +#define get_cycles get_cycles int get_phys_clock(unsigned long *clock); void init_cpu_timer(void); diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 0eb1d1cc53a8..dddb32e53db8 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -292,6 +292,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) return err; } +/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different + * attribute::type values: + * - PERF_TYPE_HARDWARE: + * - pmu->type: + * Handle both type of invocations identical. They address the same hardware. + * The result is different when event modifiers exclude_kernel and/or + * exclude_user are also set. + */ +static int cpumf_pmu_event_type(struct perf_event *event) +{ + u64 ev = event->attr.config; + + if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) + return PERF_TYPE_HARDWARE; + return PERF_TYPE_RAW; +} + static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; @@ -301,7 +321,7 @@ static int cpumf_pmu_event_init(struct perf_event *event) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ - err = __hw_perf_event_init(event, PERF_TYPE_RAW); + err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); else return -ENOENT; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 1e75cc983546..b922dc0c8130 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -51,7 +51,7 @@ static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) if (!stack) return NULL; - return (struct kvm_s390_sie_block *) stack->empty1[0]; + return (struct kvm_s390_sie_block *)stack->empty1[1]; } static bool is_in_guest(struct pt_regs *regs) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f9f8721dc532..520cf5a152cf 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1009,6 +1009,11 @@ static void __init setup_randomness(void) if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free((unsigned long) vmms, PAGE_SIZE); + +#ifdef CONFIG_ARCH_RANDOM + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + static_branch_enable(&s390_arch_random_available); +#endif } /* diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index f2d19d40272c..2db097c14cec 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2596,6 +2596,18 @@ static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, return 0; } +/* + * Give a chance to schedule after setting a key to 256 pages. + * We only hold the mm lock, which is a rwsem and the kvm srcu. + * Both can sleep. + */ +static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + cond_resched(); + return 0; +} + static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, unsigned long hmask, unsigned long next, struct mm_walk *walk) @@ -2618,12 +2630,14 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, end = start + HPAGE_SIZE - 1; __storage_key_init_range(start, end); set_bit(PG_arch_1, &page->flags); + cond_resched(); return 0; } static const struct mm_walk_ops enable_skey_walk_ops = { .hugetlb_entry = __s390_enable_skey_hugetlb, .pte_entry = __s390_enable_skey_pte, + .pmd_entry = __s390_enable_skey_pmd, }; int s390_enable_skey(void) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index fabaedddc90c..1c05caf68e7d 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -734,7 +734,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; ptev = pte_val(*ptep); if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) - page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); + page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); pgste_set_unlock(ptep, pgste); preempt_enable(); } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e14e4a3a647a..74799439b259 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -69,6 +69,7 @@ struct zpci_dev *get_zdev_by_fid(u32 fid) list_for_each_entry(tmp, &zpci_list, entry) { if (tmp->fid == fid) { zdev = tmp; + zpci_zdev_get(zdev); break; } } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index 55c9488e504c..8d2fcd091ca7 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -13,7 +13,8 @@ void zpci_bus_device_unregister(struct zpci_dev *zdev); void zpci_release_device(struct kref *kref); static inline void zpci_zdev_put(struct zpci_dev *zdev) { - kref_put(&zdev->kref, zpci_release_device); + if (zdev) + kref_put(&zdev->kref, zpci_release_device); } static inline void zpci_zdev_get(struct zpci_dev *zdev) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 0a0e8b8293be..d1a5c80a41cb 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -22,6 +22,8 @@ #include #include +#include "pci_bus.h" + bool zpci_unique_uid; void update_uid_checking(bool new) @@ -372,8 +374,11 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; zdev = get_zdev_by_fid(entry->fid); - if (!zdev) - zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (zdev) { + zpci_zdev_put(zdev); + return; + } + zpci_create_device(entry->fid, entry->fh, entry->config_state); } int clp_scan_pci_devices(void) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b7cfde7e80a8..6ced44b5be8a 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -61,10 +61,12 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); if (!pdev) - return; + goto no_pdev; pdev->error_state = pci_channel_io_perm_failure; pci_dev_put(pdev); +no_pdev: + zpci_zdev_put(zdev); } void zpci_event_error(void *data) @@ -76,6 +78,7 @@ void zpci_event_error(void *data) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); + bool existing_zdev = !!zdev; enum zpci_state state; struct pci_dev *pdev; int ret; @@ -161,6 +164,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } + if (existing_zdev) + zpci_zdev_put(zdev); } void zpci_event_availability(void *data) diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 6d5c6463bc07..de99a19e72d7 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -271,8 +271,12 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, #endif /* CONFIG_HAVE_IOREMAP_PROT */ #else /* CONFIG_MMU */ -#define iounmap(addr) do { } while (0) -#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset)) +static inline void __iomem *ioremap(phys_addr_t offset, size_t size) +{ + return (void __iomem *)(unsigned long)offset; +} + +static inline void iounmap(volatile void __iomem *addr) { } #endif /* CONFIG_MMU */ #define ioremap_uc ioremap diff --git a/arch/sparc/include/asm/timex_32.h b/arch/sparc/include/asm/timex_32.h index 542915b46209..f86326a6f89e 100644 --- a/arch/sparc/include/asm/timex_32.h +++ b/arch/sparc/include/asm/timex_32.h @@ -9,8 +9,6 @@ #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -/* XXX Maybe do something better at some point... -DaveM */ -typedef unsigned long cycles_t; -#define get_cycles() (0) +#include #endif diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index 6040817c036f..25727ed648b7 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -220,7 +220,7 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, unsigned long *stack_out) { struct winch_data data; - int fds[2], n, err; + int fds[2], n, err, pid; char c; err = os_pipe(fds, 1, 1); @@ -238,8 +238,9 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, * problem with /dev/net/tun, which if held open by this * thread, prevents the TUN/TAP device from being reused. */ - err = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); - if (err < 0) { + pid = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); + if (pid < 0) { + err = pid; printk(UM_KERN_ERR "fork of winch_thread failed - errno = %d\n", -err); goto out_close; @@ -263,7 +264,7 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, goto out_close; } - return err; + return pid; out_close: close(fds[1]); diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 4c19ce4c49f1..66ab6a07330b 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -63,6 +63,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 7 #define TIF_NOTIFY_RESUME 8 #define TIF_SECCOMP 9 /* secure computing */ +#define TIF_SINGLESTEP 10 /* single stepping userspace */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -70,5 +71,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #endif diff --git a/arch/um/include/asm/timex.h b/arch/um/include/asm/timex.h index e392a9a5bc9b..9f27176adb26 100644 --- a/arch/um/include/asm/timex.h +++ b/arch/um/include/asm/timex.h @@ -2,13 +2,8 @@ #ifndef __UM_TIMEX_H #define __UM_TIMEX_H -typedef unsigned long cycles_t; - -static inline cycles_t get_cycles (void) -{ - return 0; -} - #define CLOCK_TICK_RATE (HZ) +#include + #endif diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index e8fd5d540b05..7f7a74c82abb 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -44,7 +44,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; - current->ptrace &= ~PT_DTRACE; + clear_thread_flag(TIF_SINGLESTEP); #ifdef SUBARCH_EXECVE1 SUBARCH_EXECVE1(regs->regs); #endif diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 9505a7e87396..8eb8b736abc1 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -341,7 +341,7 @@ int singlestepping(void * t) { struct task_struct *task = t ? t : current; - if (!(task->ptrace & PT_DTRACE)) + if (!test_thread_flag(TIF_SINGLESTEP)) return 0; if (task->thread.singlestep_syscall) diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index b425f47bddbb..d37802ced563 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -12,7 +12,7 @@ void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_DTRACE; + set_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -22,7 +22,7 @@ void user_enable_single_step(struct task_struct *child) void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_DTRACE; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -121,7 +121,7 @@ static void send_sigtrap(struct uml_pt_regs *regs, int error_code) } /* - * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and + * XXX Check TIF_SINGLESTEP for singlestepping check and * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check */ int syscall_trace_enter(struct pt_regs *regs) @@ -145,7 +145,7 @@ void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(regs); /* Fake a debug trap */ - if (ptraced & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) send_sigtrap(®s->regs, 0); if (!test_thread_flag(TIF_SYSCALL_TRACE)) diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 88cd9b5c1b74..ae4658f576ab 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -53,7 +53,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) unsigned long sp; int err; - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED)) singlestep = 1; /* Did we come from a system call? */ @@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs) * on the host. The tracing thread will check this flag and * PTRACE_SYSCALL if necessary. */ - if (current->ptrace & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) current->thread.singlestep_syscall = is_syscall(PT_REGS_IP(¤t->thread.regs)); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 76b37297b7d4..26af24b5d900 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -358,6 +358,14 @@ void __init check_bugs(void) os_check_bugs(); } +void apply_retpolines(s32 *start, s32 *end) +{ +} + +void apply_returns(s32 *start, s32 *end) +{ +} + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9161ee0640b6..cae5e879a0db 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -460,15 +460,6 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH -config RETPOLINE - bool "Avoid speculative indirect branches in kernel" - default y - help - Compile kernel with the retpoline compiler options to guard against - kernel-to-user data leaks by avoiding speculative indirect - branches. Requires a compiler with -mindirect-branch=thunk-extern - support for full protection. The kernel may run slower. - config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) @@ -1328,7 +1319,7 @@ config MICROCODE config MICROCODE_INTEL bool "Intel microcode loading support" - depends on MICROCODE + depends on CPU_SUP_INTEL && MICROCODE default MICROCODE help This options enables microcode patch loading support for Intel @@ -1340,7 +1331,7 @@ config MICROCODE_INTEL config MICROCODE_AMD bool "AMD microcode loading support" - depends on MICROCODE + depends on CPU_SUP_AMD && MICROCODE help If you select this option, microcode patch loading support for AMD processors will be enabled. @@ -2422,6 +2413,88 @@ source "kernel/livepatch/Kconfig" endmenu +config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + +config CC_HAS_RETURN_THUNK + def_bool $(cc-option,-mfunction-return=thunk-extern) + +menuconfig SPECULATION_MITIGATIONS + bool "Mitigations for speculative execution vulnerabilities" + default y + help + Say Y here to enable options which enable mitigations for + speculative execution hardware vulnerabilities. + + If you say N, all mitigations will be disabled. You really + should know what you are doing to say so. + +if SPECULATION_MITIGATIONS + +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on (X86_64 || X86_PAE) + help + This feature reduces the number of hardware side channels by + ensuring that the majority of kernel addresses are not mapped + into userspace. + + See Documentation/x86/pti.rst for more details. + +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + +config RETHUNK + bool "Enable return-thunks" + depends on RETPOLINE && CC_HAS_RETURN_THUNK + default y if X86_64 + help + Compile the kernel with the return-thunks compiler option to guard + against kernel-to-user data leaks by avoiding return speculation. + Requires a compiler with -mfunction-return=thunk-extern + support for full protection. The kernel may run slower. + +config CPU_UNRET_ENTRY + bool "Enable UNRET on kernel entry" + depends on CPU_SUP_AMD && RETHUNK && X86_64 + default y + help + Compile the kernel with support for the retbleed=unret mitigation. + +config CPU_IBPB_ENTRY + bool "Enable IBPB on kernel entry" + depends on CPU_SUP_AMD && X86_64 + default y + help + Compile the kernel with support for the retbleed=ibpb mitigation. + +config CPU_IBRS_ENTRY + bool "Enable IBRS on kernel entry" + depends on CPU_SUP_INTEL && X86_64 + default y + help + Compile the kernel with support for the spectre_v2=ibrs mitigation. + This mitigates both spectre_v2 and retbleed at great cost to + performance. + +config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 + default n + help + Compile the kernel with straight-line-speculation options to guard + against straight line speculation. The kernel image might be slightly + larger. + +endif + config ARCH_HAS_ADD_PAGES def_bool y depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c824dc6acea4..616cc9aec021 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -31,7 +31,7 @@ endif CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS)) -REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \ +REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) @@ -196,6 +196,10 @@ ifdef CONFIG_RETPOLINE endif endif +ifdef CONFIG_SLS + KBUILD_CFLAGS += -mharden-sls=all +endif + KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) ifdef CONFIG_LTO_CLANG diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S index c4bb0f9363f5..c8052a141b08 100644 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -89,7 +89,7 @@ SYM_FUNC_START(__efi64_thunk) pop %rbx pop %rbp - ret + RET SYM_FUNC_END(__efi64_thunk) .code32 diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 72f655c238cf..b55e2007b30c 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -786,7 +786,7 @@ SYM_FUNC_START(efi32_pe_entry) 2: popl %edi // restore callee-save registers popl %ebx leave - ret + RET SYM_FUNC_END(efi32_pe_entry) .section ".rodata" @@ -868,7 +868,7 @@ SYM_FUNC_START(startup32_check_sev_cbit) popl %ebx popl %eax #endif - ret + RET SYM_FUNC_END(startup32_check_sev_cbit) /* diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index a6dea4e8a082..484a9c06f41d 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -58,7 +58,7 @@ SYM_FUNC_START(get_sev_encryption_bit) #endif /* CONFIG_AMD_MEM_ENCRYPT */ - ret + RET SYM_FUNC_END(get_sev_encryption_bit) .code64 @@ -99,7 +99,7 @@ SYM_FUNC_START(set_sev_encryption_mask) #endif xor %rax, %rax - ret + RET SYM_FUNC_END(set_sev_encryption_mask) .data diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index a31de0c6ccde..66f25c2938bf 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -66,7 +66,9 @@ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o -blake2s-x86_64-y := blake2s-core.o blake2s-glue.o +blake2s-x86_64-y := blake2s-shash.o +obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o +libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 51d46d93efbc..b48ddebb4748 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -122,7 +122,7 @@ SYM_FUNC_START_LOCAL(__load_partial) pxor T0, MSG .Lld_partial_8: - ret + RET SYM_FUNC_END(__load_partial) /* @@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(__store_partial) mov %r10b, (%r9) .Lst_partial_1: - ret + RET SYM_FUNC_END(__store_partial) /* @@ -225,7 +225,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_init) movdqu STATE4, 0x40(STATEP) FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_init) /* @@ -337,7 +337,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) FRAME_END - ret + RET .Lad_out_1: movdqu STATE4, 0x00(STATEP) @@ -346,7 +346,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) FRAME_END - ret + RET .Lad_out_2: movdqu STATE3, 0x00(STATEP) @@ -355,7 +355,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) FRAME_END - ret + RET .Lad_out_3: movdqu STATE2, 0x00(STATEP) @@ -364,7 +364,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) FRAME_END - ret + RET .Lad_out_4: movdqu STATE1, 0x00(STATEP) @@ -373,11 +373,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) FRAME_END - ret + RET .Lad_out: FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_ad) .macro encrypt_block a s0 s1 s2 s3 s4 i @@ -452,7 +452,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) FRAME_END - ret + RET .Lenc_out_1: movdqu STATE3, 0x00(STATEP) @@ -461,7 +461,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) FRAME_END - ret + RET .Lenc_out_2: movdqu STATE2, 0x00(STATEP) @@ -470,7 +470,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) FRAME_END - ret + RET .Lenc_out_3: movdqu STATE1, 0x00(STATEP) @@ -479,7 +479,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) FRAME_END - ret + RET .Lenc_out_4: movdqu STATE0, 0x00(STATEP) @@ -488,11 +488,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) FRAME_END - ret + RET .Lenc_out: FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_enc) /* @@ -532,7 +532,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) movdqu STATE3, 0x40(STATEP) FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) .macro decrypt_block a s0 s1 s2 s3 s4 i @@ -606,7 +606,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) FRAME_END - ret + RET .Ldec_out_1: movdqu STATE3, 0x00(STATEP) @@ -615,7 +615,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) FRAME_END - ret + RET .Ldec_out_2: movdqu STATE2, 0x00(STATEP) @@ -624,7 +624,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) FRAME_END - ret + RET .Ldec_out_3: movdqu STATE1, 0x00(STATEP) @@ -633,7 +633,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) FRAME_END - ret + RET .Ldec_out_4: movdqu STATE0, 0x00(STATEP) @@ -642,11 +642,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) FRAME_END - ret + RET .Ldec_out: FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_dec) /* @@ -696,7 +696,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) movdqu STATE3, 0x40(STATEP) FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) /* @@ -743,5 +743,5 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) movdqu MSG, (%rsi) FRAME_END - ret + RET SYM_FUNC_END(crypto_aegis128_aesni_final) diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index 3f0fc7dd87d7..c799838242a6 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -525,7 +525,7 @@ ddq_add_8: /* return updated IV */ vpshufb xbyteswap, xcounter, xcounter vmovdqu xcounter, (p_iv) - ret + RET .endm /* diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 57aef3f5a81e..69c7c0dc22ea 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1598,7 +1598,7 @@ SYM_FUNC_START(aesni_gcm_dec) GCM_ENC_DEC dec GCM_COMPLETE arg10, arg11 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_dec) @@ -1687,7 +1687,7 @@ SYM_FUNC_START(aesni_gcm_enc) GCM_COMPLETE arg10, arg11 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_enc) /***************************************************************************** @@ -1705,7 +1705,7 @@ SYM_FUNC_START(aesni_gcm_init) FUNC_SAVE GCM_INIT %arg3, %arg4,%arg5, %arg6 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_init) /***************************************************************************** @@ -1720,7 +1720,7 @@ SYM_FUNC_START(aesni_gcm_enc_update) FUNC_SAVE GCM_ENC_DEC enc FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_enc_update) /***************************************************************************** @@ -1735,7 +1735,7 @@ SYM_FUNC_START(aesni_gcm_dec_update) FUNC_SAVE GCM_ENC_DEC dec FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_dec_update) /***************************************************************************** @@ -1750,7 +1750,7 @@ SYM_FUNC_START(aesni_gcm_finalize) FUNC_SAVE GCM_COMPLETE %arg3 %arg4 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_finalize) #endif @@ -1766,7 +1766,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a) pxor %xmm1, %xmm0 movaps %xmm0, (TKEYP) add $0x10, TKEYP - ret + RET SYM_FUNC_END(_key_expansion_256a) SYM_FUNC_END_ALIAS(_key_expansion_128) @@ -1791,7 +1791,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192a) shufps $0b01001110, %xmm2, %xmm1 movaps %xmm1, 0x10(TKEYP) add $0x20, TKEYP - ret + RET SYM_FUNC_END(_key_expansion_192a) SYM_FUNC_START_LOCAL(_key_expansion_192b) @@ -1810,7 +1810,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_192b) movaps %xmm0, (TKEYP) add $0x10, TKEYP - ret + RET SYM_FUNC_END(_key_expansion_192b) SYM_FUNC_START_LOCAL(_key_expansion_256b) @@ -1822,7 +1822,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256b) pxor %xmm1, %xmm2 movaps %xmm2, (TKEYP) add $0x10, TKEYP - ret + RET SYM_FUNC_END(_key_expansion_256b) /* @@ -1937,7 +1937,7 @@ SYM_FUNC_START(aesni_set_key) popl KEYP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_set_key) /* @@ -1961,7 +1961,7 @@ SYM_FUNC_START(aesni_enc) popl KEYP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_enc) /* @@ -2018,7 +2018,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc1) aesenc KEY, STATE movaps 0x70(TKEYP), KEY aesenclast KEY, STATE - ret + RET SYM_FUNC_END(_aesni_enc1) /* @@ -2126,7 +2126,7 @@ SYM_FUNC_START_LOCAL(_aesni_enc4) aesenclast KEY, STATE2 aesenclast KEY, STATE3 aesenclast KEY, STATE4 - ret + RET SYM_FUNC_END(_aesni_enc4) /* @@ -2151,7 +2151,7 @@ SYM_FUNC_START(aesni_dec) popl KEYP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_dec) /* @@ -2208,7 +2208,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec1) aesdec KEY, STATE movaps 0x70(TKEYP), KEY aesdeclast KEY, STATE - ret + RET SYM_FUNC_END(_aesni_dec1) /* @@ -2316,7 +2316,7 @@ SYM_FUNC_START_LOCAL(_aesni_dec4) aesdeclast KEY, STATE2 aesdeclast KEY, STATE3 aesdeclast KEY, STATE4 - ret + RET SYM_FUNC_END(_aesni_dec4) /* @@ -2376,7 +2376,7 @@ SYM_FUNC_START(aesni_ecb_enc) popl LEN #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_ecb_enc) /* @@ -2437,7 +2437,7 @@ SYM_FUNC_START(aesni_ecb_dec) popl LEN #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_ecb_dec) /* @@ -2481,7 +2481,7 @@ SYM_FUNC_START(aesni_cbc_enc) popl IVP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_cbc_enc) /* @@ -2574,7 +2574,7 @@ SYM_FUNC_START(aesni_cbc_dec) popl IVP #endif FRAME_END - ret + RET SYM_FUNC_END(aesni_cbc_dec) #ifdef __x86_64__ @@ -2602,7 +2602,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc_init) mov $1, TCTR_LOW movq TCTR_LOW, INC movq CTR, TCTR_LOW - ret + RET SYM_FUNC_END(_aesni_inc_init) /* @@ -2630,7 +2630,7 @@ SYM_FUNC_START_LOCAL(_aesni_inc) .Linc_low: movaps CTR, IV pshufb BSWAP_MASK, IV - ret + RET SYM_FUNC_END(_aesni_inc) /* @@ -2693,7 +2693,7 @@ SYM_FUNC_START(aesni_ctr_enc) movups IV, (IVP) .Lctr_enc_just_ret: FRAME_END - ret + RET SYM_FUNC_END(aesni_ctr_enc) /* @@ -2778,7 +2778,7 @@ SYM_FUNC_START(aesni_xts_encrypt) movups IV, (IVP) FRAME_END - ret + RET SYM_FUNC_END(aesni_xts_encrypt) /* @@ -2846,7 +2846,7 @@ SYM_FUNC_START(aesni_xts_decrypt) movups IV, (IVP) FRAME_END - ret + RET SYM_FUNC_END(aesni_xts_decrypt) #endif diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 2cf8e94d986a..4d9b2f887064 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -1777,7 +1777,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen2) FUNC_SAVE INIT GHASH_MUL_AVX, PRECOMPUTE_AVX FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_init_avx_gen2) ############################################################################### @@ -1798,15 +1798,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_gen2) # must be 192 GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11 FUNC_RESTORE - ret + RET key_128_enc_update: GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9 FUNC_RESTORE - ret + RET key_256_enc_update: GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2) ############################################################################### @@ -1827,15 +1827,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_gen2) # must be 192 GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11 FUNC_RESTORE - ret + RET key_128_dec_update: GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9 FUNC_RESTORE - ret + RET key_256_dec_update: GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2) ############################################################################### @@ -1856,15 +1856,15 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_gen2) # must be 192 GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4 FUNC_RESTORE - ret + RET key_128_finalize: GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4 FUNC_RESTORE - ret + RET key_256_finalize: GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) ############################################################################### @@ -2745,7 +2745,7 @@ SYM_FUNC_START(aesni_gcm_init_avx_gen4) FUNC_SAVE INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_init_avx_gen4) ############################################################################### @@ -2766,15 +2766,15 @@ SYM_FUNC_START(aesni_gcm_enc_update_avx_gen4) # must be 192 GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11 FUNC_RESTORE - ret + RET key_128_enc_update4: GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9 FUNC_RESTORE - ret + RET key_256_enc_update4: GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4) ############################################################################### @@ -2795,15 +2795,15 @@ SYM_FUNC_START(aesni_gcm_dec_update_avx_gen4) # must be 192 GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11 FUNC_RESTORE - ret + RET key_128_dec_update4: GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9 FUNC_RESTORE - ret + RET key_256_dec_update4: GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4) ############################################################################### @@ -2824,13 +2824,13 @@ SYM_FUNC_START(aesni_gcm_finalize_avx_gen4) # must be 192 GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4 FUNC_RESTORE - ret + RET key_128_finalize4: GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4 FUNC_RESTORE - ret + RET key_256_finalize4: GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4 FUNC_RESTORE - ret + RET SYM_FUNC_END(aesni_gcm_finalize_avx_gen4) diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S index 2ca79974f819..b50b35ff1fdb 100644 --- a/arch/x86/crypto/blake2s-core.S +++ b/arch/x86/crypto/blake2s-core.S @@ -171,7 +171,7 @@ SYM_FUNC_START(blake2s_compress_ssse3) movdqu %xmm1,0x10(%rdi) movdqu %xmm14,0x20(%rdi) .Lendofloop: - ret + RET SYM_FUNC_END(blake2s_compress_ssse3) #ifdef CONFIG_AS_AVX512 @@ -251,6 +251,6 @@ SYM_FUNC_START(blake2s_compress_avx512) vmovdqu %xmm1,0x10(%rdi) vmovdqu %xmm4,0x20(%rdi) vzeroupper - retq + RET SYM_FUNC_END(blake2s_compress_avx512) #endif /* CONFIG_AS_AVX512 */ diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c index a40365ab301e..69853c13e8fb 100644 --- a/arch/x86/crypto/blake2s-glue.c +++ b/arch/x86/crypto/blake2s-glue.c @@ -5,7 +5,6 @@ #include #include -#include #include #include @@ -28,9 +27,8 @@ asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); -void blake2s_compress_arch(struct blake2s_state *state, - const u8 *block, size_t nblocks, - const u32 inc) +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) { /* SIMD disables preemption, so relax after processing each page. */ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); @@ -56,49 +54,12 @@ void blake2s_compress_arch(struct blake2s_state *state, block += blocks * BLAKE2S_BLOCK_SIZE; } while (nblocks); } -EXPORT_SYMBOL(blake2s_compress_arch); - -static int crypto_blake2s_update_x86(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch); -} - -static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) -{ - return crypto_blake2s_final(desc, out, blake2s_compress_arch); -} - -#define BLAKE2S_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 200, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2s_setkey, \ - .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update_x86, \ - .final = crypto_blake2s_final_x86, \ - .descsize = sizeof(struct blake2s_state), \ - } - -static struct shash_alg blake2s_algs[] = { - BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE), - BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE), - BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE), - BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE), -}; +EXPORT_SYMBOL(blake2s_compress); static int __init blake2s_mod_init(void) { - if (!boot_cpu_has(X86_FEATURE_SSSE3)) - return 0; - - static_branch_enable(&blake2s_use_ssse3); + if (boot_cpu_has(X86_FEATURE_SSSE3)) + static_branch_enable(&blake2s_use_ssse3); if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) && @@ -109,26 +70,9 @@ static int __init blake2s_mod_init(void) XFEATURE_MASK_AVX512, NULL)) static_branch_enable(&blake2s_use_avx512); - return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? - crypto_register_shashes(blake2s_algs, - ARRAY_SIZE(blake2s_algs)) : 0; -} - -static void __exit blake2s_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) - crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); + return 0; } module_init(blake2s_mod_init); -module_exit(blake2s_mod_exit); -MODULE_ALIAS_CRYPTO("blake2s-128"); -MODULE_ALIAS_CRYPTO("blake2s-128-x86"); -MODULE_ALIAS_CRYPTO("blake2s-160"); -MODULE_ALIAS_CRYPTO("blake2s-160-x86"); -MODULE_ALIAS_CRYPTO("blake2s-224"); -MODULE_ALIAS_CRYPTO("blake2s-224-x86"); -MODULE_ALIAS_CRYPTO("blake2s-256"); -MODULE_ALIAS_CRYPTO("blake2s-256-x86"); MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c new file mode 100644 index 000000000000..59ae28abe35c --- /dev/null +++ b/arch/x86/crypto/blake2s-shash.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +static int crypto_blake2s_update_x86(struct shash_desc *desc, + const u8 *in, unsigned int inlen) +{ + return crypto_blake2s_update(desc, in, inlen, false); +} + +static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) +{ + return crypto_blake2s_final(desc, out, false); +} + +#define BLAKE2S_ALG(name, driver_name, digest_size) \ + { \ + .base.cra_name = name, \ + .base.cra_driver_name = driver_name, \ + .base.cra_priority = 200, \ + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ + .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ + .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ + .base.cra_module = THIS_MODULE, \ + .digestsize = digest_size, \ + .setkey = crypto_blake2s_setkey, \ + .init = crypto_blake2s_init, \ + .update = crypto_blake2s_update_x86, \ + .final = crypto_blake2s_final_x86, \ + .descsize = sizeof(struct blake2s_state), \ + } + +static struct shash_alg blake2s_algs[] = { + BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE), + BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE), + BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE), + BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE), +}; + +static int __init blake2s_mod_init(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); + return 0; +} + +static void __exit blake2s_mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); +} + +module_init(blake2s_mod_init); +module_exit(blake2s_mod_exit); + +MODULE_ALIAS_CRYPTO("blake2s-128"); +MODULE_ALIAS_CRYPTO("blake2s-128-x86"); +MODULE_ALIAS_CRYPTO("blake2s-160"); +MODULE_ALIAS_CRYPTO("blake2s-160-x86"); +MODULE_ALIAS_CRYPTO("blake2s-224"); +MODULE_ALIAS_CRYPTO("blake2s-224-x86"); +MODULE_ALIAS_CRYPTO("blake2s-256"); +MODULE_ALIAS_CRYPTO("blake2s-256-x86"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 4222ac6d6584..802d71582689 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -135,10 +135,10 @@ SYM_FUNC_START(__blowfish_enc_blk) jnz .L__enc_xor; write_block(); - ret; + RET; .L__enc_xor: xor_block(); - ret; + RET; SYM_FUNC_END(__blowfish_enc_blk) SYM_FUNC_START(blowfish_dec_blk) @@ -170,7 +170,7 @@ SYM_FUNC_START(blowfish_dec_blk) movq %r11, %r12; - ret; + RET; SYM_FUNC_END(blowfish_dec_blk) /********************************************************************** @@ -322,14 +322,14 @@ SYM_FUNC_START(__blowfish_enc_blk_4way) popq %rbx; popq %r12; - ret; + RET; .L__enc_xor4: xor_block4(); popq %rbx; popq %r12; - ret; + RET; SYM_FUNC_END(__blowfish_enc_blk_4way) SYM_FUNC_START(blowfish_dec_blk_4way) @@ -364,5 +364,5 @@ SYM_FUNC_START(blowfish_dec_blk_4way) popq %rbx; popq %r12; - ret; + RET; SYM_FUNC_END(blowfish_dec_blk_4way) diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index ecc0a9a905c4..297b1a7ed2bc 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -193,7 +193,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %rcx, (%r9)); - ret; + RET; SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) .align 8 @@ -201,7 +201,7 @@ SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_a roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, %rax, (%r9)); - ret; + RET; SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) /* @@ -787,7 +787,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16) %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax)); FRAME_END - ret; + RET; .align 8 .Lenc_max32: @@ -874,7 +874,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk16) %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax)); FRAME_END - ret; + RET; .align 8 .Ldec_max32: @@ -915,7 +915,7 @@ SYM_FUNC_START(camellia_ecb_enc_16way) %xmm8, %rsi); FRAME_END - ret; + RET; SYM_FUNC_END(camellia_ecb_enc_16way) SYM_FUNC_START(camellia_ecb_dec_16way) @@ -945,7 +945,7 @@ SYM_FUNC_START(camellia_ecb_dec_16way) %xmm8, %rsi); FRAME_END - ret; + RET; SYM_FUNC_END(camellia_ecb_dec_16way) SYM_FUNC_START(camellia_cbc_dec_16way) @@ -996,7 +996,7 @@ SYM_FUNC_START(camellia_cbc_dec_16way) %xmm8, %rsi); FRAME_END - ret; + RET; SYM_FUNC_END(camellia_cbc_dec_16way) #define inc_le128(x, minus_one, tmp) \ @@ -1109,7 +1109,7 @@ SYM_FUNC_START(camellia_ctr_16way) %xmm8, %rsi); FRAME_END - ret; + RET; SYM_FUNC_END(camellia_ctr_16way) #define gf128mul_x_ble(iv, mask, tmp) \ @@ -1253,7 +1253,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way) %xmm8, %rsi); FRAME_END - ret; + RET; SYM_FUNC_END(camellia_xts_crypt_16way) SYM_FUNC_START(camellia_xts_enc_16way) diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 0907243c501c..288cd246da20 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -227,7 +227,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, %rcx, (%r9)); - ret; + RET; SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) .align 8 @@ -235,7 +235,7 @@ SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_a roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, %rax, (%r9)); - ret; + RET; SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) /* @@ -825,7 +825,7 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32) %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax)); FRAME_END - ret; + RET; .align 8 .Lenc_max32: @@ -912,7 +912,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk32) %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); FRAME_END - ret; + RET; .align 8 .Ldec_max32: @@ -957,7 +957,7 @@ SYM_FUNC_START(camellia_ecb_enc_32way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(camellia_ecb_enc_32way) SYM_FUNC_START(camellia_ecb_dec_32way) @@ -991,7 +991,7 @@ SYM_FUNC_START(camellia_ecb_dec_32way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(camellia_ecb_dec_32way) SYM_FUNC_START(camellia_cbc_dec_32way) @@ -1059,7 +1059,7 @@ SYM_FUNC_START(camellia_cbc_dec_32way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(camellia_cbc_dec_32way) #define inc_le128(x, minus_one, tmp) \ @@ -1199,7 +1199,7 @@ SYM_FUNC_START(camellia_ctr_32way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(camellia_ctr_32way) #define gf128mul_x_ble(iv, mask, tmp) \ @@ -1366,7 +1366,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(camellia_xts_crypt_32way) SYM_FUNC_START(camellia_xts_enc_32way) diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 1372e6408850..347c059f5940 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -213,13 +213,13 @@ SYM_FUNC_START(__camellia_enc_blk) enc_outunpack(mov, RT1); movq RR12, %r12; - ret; + RET; .L__enc_xor: enc_outunpack(xor, RT1); movq RR12, %r12; - ret; + RET; SYM_FUNC_END(__camellia_enc_blk) SYM_FUNC_START(camellia_dec_blk) @@ -257,7 +257,7 @@ SYM_FUNC_START(camellia_dec_blk) dec_outunpack(); movq RR12, %r12; - ret; + RET; SYM_FUNC_END(camellia_dec_blk) /********************************************************************** @@ -448,14 +448,14 @@ SYM_FUNC_START(__camellia_enc_blk_2way) movq RR12, %r12; popq %rbx; - ret; + RET; .L__enc2_xor: enc_outunpack2(xor, RT2); movq RR12, %r12; popq %rbx; - ret; + RET; SYM_FUNC_END(__camellia_enc_blk_2way) SYM_FUNC_START(camellia_dec_blk_2way) @@ -495,5 +495,5 @@ SYM_FUNC_START(camellia_dec_blk_2way) movq RR12, %r12; movq RXOR, %rbx; - ret; + RET; SYM_FUNC_END(camellia_dec_blk_2way) diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 8a6181b08b59..b258af420c92 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -279,7 +279,7 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16) outunpack_blocks(RR3, RL3, RTMP, RX, RKM); outunpack_blocks(RR4, RL4, RTMP, RX, RKM); - ret; + RET; SYM_FUNC_END(__cast5_enc_blk16) .align 16 @@ -352,7 +352,7 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16) outunpack_blocks(RR3, RL3, RTMP, RX, RKM); outunpack_blocks(RR4, RL4, RTMP, RX, RKM); - ret; + RET; .L__skip_dec: vpsrldq $4, RKR, RKR; @@ -393,7 +393,7 @@ SYM_FUNC_START(cast5_ecb_enc_16way) popq %r15; FRAME_END - ret; + RET; SYM_FUNC_END(cast5_ecb_enc_16way) SYM_FUNC_START(cast5_ecb_dec_16way) @@ -431,7 +431,7 @@ SYM_FUNC_START(cast5_ecb_dec_16way) popq %r15; FRAME_END - ret; + RET; SYM_FUNC_END(cast5_ecb_dec_16way) SYM_FUNC_START(cast5_cbc_dec_16way) @@ -483,7 +483,7 @@ SYM_FUNC_START(cast5_cbc_dec_16way) popq %r15; popq %r12; FRAME_END - ret; + RET; SYM_FUNC_END(cast5_cbc_dec_16way) SYM_FUNC_START(cast5_ctr_16way) @@ -559,5 +559,5 @@ SYM_FUNC_START(cast5_ctr_16way) popq %r15; popq %r12; FRAME_END - ret; + RET; SYM_FUNC_END(cast5_ctr_16way) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 932a3ce32a88..6eccaf1fb4c6 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -291,7 +291,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8) outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); - ret; + RET; SYM_FUNC_END(__cast6_enc_blk8) .align 8 @@ -338,7 +338,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8) outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); - ret; + RET; SYM_FUNC_END(__cast6_dec_blk8) SYM_FUNC_START(cast6_ecb_enc_8way) @@ -361,7 +361,7 @@ SYM_FUNC_START(cast6_ecb_enc_8way) popq %r15; FRAME_END - ret; + RET; SYM_FUNC_END(cast6_ecb_enc_8way) SYM_FUNC_START(cast6_ecb_dec_8way) @@ -384,7 +384,7 @@ SYM_FUNC_START(cast6_ecb_dec_8way) popq %r15; FRAME_END - ret; + RET; SYM_FUNC_END(cast6_ecb_dec_8way) SYM_FUNC_START(cast6_cbc_dec_8way) @@ -410,7 +410,7 @@ SYM_FUNC_START(cast6_cbc_dec_8way) popq %r15; popq %r12; FRAME_END - ret; + RET; SYM_FUNC_END(cast6_cbc_dec_8way) SYM_FUNC_START(cast6_ctr_8way) @@ -438,7 +438,7 @@ SYM_FUNC_START(cast6_ctr_8way) popq %r15; popq %r12; FRAME_END - ret; + RET; SYM_FUNC_END(cast6_ctr_8way) SYM_FUNC_START(cast6_xts_enc_8way) @@ -465,7 +465,7 @@ SYM_FUNC_START(cast6_xts_enc_8way) popq %r15; FRAME_END - ret; + RET; SYM_FUNC_END(cast6_xts_enc_8way) SYM_FUNC_START(cast6_xts_dec_8way) @@ -492,5 +492,5 @@ SYM_FUNC_START(cast6_xts_dec_8way) popq %r15; FRAME_END - ret; + RET; SYM_FUNC_END(cast6_xts_dec_8way) diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S index ee9a40ab4109..f3d8fc018249 100644 --- a/arch/x86/crypto/chacha-avx2-x86_64.S +++ b/arch/x86/crypto/chacha-avx2-x86_64.S @@ -193,7 +193,7 @@ SYM_FUNC_START(chacha_2block_xor_avx2) .Ldone2: vzeroupper - ret + RET .Lxorpart2: # xor remaining bytes from partial register into output @@ -498,7 +498,7 @@ SYM_FUNC_START(chacha_4block_xor_avx2) .Ldone4: vzeroupper - ret + RET .Lxorpart4: # xor remaining bytes from partial register into output @@ -992,7 +992,7 @@ SYM_FUNC_START(chacha_8block_xor_avx2) .Ldone8: vzeroupper lea -8(%r10),%rsp - ret + RET .Lxorpart8: # xor remaining bytes from partial register into output diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S index bb193fde123a..259383e1ad44 100644 --- a/arch/x86/crypto/chacha-avx512vl-x86_64.S +++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S @@ -166,13 +166,13 @@ SYM_FUNC_START(chacha_2block_xor_avx512vl) .Ldone2: vzeroupper - ret + RET .Lxorpart2: # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone2 mov %rax,%r9 and $~0xf,%r9 @@ -432,13 +432,13 @@ SYM_FUNC_START(chacha_4block_xor_avx512vl) .Ldone4: vzeroupper - ret + RET .Lxorpart4: # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone4 mov %rax,%r9 and $~0xf,%r9 @@ -812,7 +812,7 @@ SYM_FUNC_START(chacha_8block_xor_avx512vl) .Ldone8: vzeroupper - ret + RET .Lxorpart8: # xor remaining bytes from partial register into output diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S index ca1788bfee16..7111949cd5b9 100644 --- a/arch/x86/crypto/chacha-ssse3-x86_64.S +++ b/arch/x86/crypto/chacha-ssse3-x86_64.S @@ -108,7 +108,7 @@ SYM_FUNC_START_LOCAL(chacha_permute) sub $2,%r8d jnz .Ldoubleround - ret + RET SYM_FUNC_END(chacha_permute) SYM_FUNC_START(chacha_block_xor_ssse3) @@ -166,7 +166,7 @@ SYM_FUNC_START(chacha_block_xor_ssse3) .Ldone: FRAME_END - ret + RET .Lxorpart: # xor remaining bytes from partial register into output @@ -217,7 +217,7 @@ SYM_FUNC_START(hchacha_block_ssse3) movdqu %xmm3,0x10(%rsi) FRAME_END - ret + RET SYM_FUNC_END(hchacha_block_ssse3) SYM_FUNC_START(chacha_4block_xor_ssse3) @@ -762,7 +762,7 @@ SYM_FUNC_START(chacha_4block_xor_ssse3) .Ldone4: lea -8(%r10),%rsp - ret + RET .Lxorpart4: # xor remaining bytes from partial register into output diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S index 6e7d4c4d3208..c392a6edbfff 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -236,5 +236,5 @@ fold_64: pxor %xmm2, %xmm1 pextrd $0x01, %xmm1, %eax - ret + RET SYM_FUNC_END(crc32_pclmul_le_16) diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 884dc767b051..f6e3568fbd63 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -309,7 +309,7 @@ do_return: popq %rsi popq %rdi popq %rbx - ret + RET SYM_FUNC_END(crc_pcl) .section .rodata, "a", @progbits diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index b2533d63030e..721474abfb71 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -257,7 +257,7 @@ SYM_FUNC_START(crc_t10dif_pcl) # Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0. pextrw $0, %xmm0, %eax - ret + RET .align 16 .Lless_than_256_bytes: diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index fac0fdc3f25d..f4c760f4cade 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -243,7 +243,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk) popq %r12; popq %rbx; - ret; + RET; SYM_FUNC_END(des3_ede_x86_64_crypt_blk) /*********************************************************************** @@ -528,7 +528,7 @@ SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way) popq %r12; popq %rbx; - ret; + RET; SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way) .section .rodata, "a", @progbits diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 99ac25e18e09..2bf871899920 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -85,7 +85,7 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble) psrlq $1, T2 pxor T2, T1 pxor T1, DATA - ret + RET SYM_FUNC_END(__clmul_gf128mul_ble) /* void clmul_ghash_mul(char *dst, const u128 *shash) */ @@ -99,7 +99,7 @@ SYM_FUNC_START(clmul_ghash_mul) pshufb BSWAP, DATA movups DATA, (%rdi) FRAME_END - ret + RET SYM_FUNC_END(clmul_ghash_mul) /* @@ -128,5 +128,5 @@ SYM_FUNC_START(clmul_ghash_update) movups DATA, (%rdi) .Lupdate_just_ret: FRAME_END - ret + RET SYM_FUNC_END(clmul_ghash_update) diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S index b22c7b936272..6a0b15e7196a 100644 --- a/arch/x86/crypto/nh-avx2-x86_64.S +++ b/arch/x86/crypto/nh-avx2-x86_64.S @@ -153,5 +153,5 @@ SYM_FUNC_START(nh_avx2) vpaddq T1, T0, T0 vpaddq T4, T0, T0 vmovdqu T0, (HASH) - ret + RET SYM_FUNC_END(nh_avx2) diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S index d7ae22dd6683..34c567bbcb4f 100644 --- a/arch/x86/crypto/nh-sse2-x86_64.S +++ b/arch/x86/crypto/nh-sse2-x86_64.S @@ -119,5 +119,5 @@ SYM_FUNC_START(nh_sse2) paddq PASS2_SUMS, T1 movdqu T0, 0x00(HASH) movdqu T1, 0x10(HASH) - ret + RET SYM_FUNC_END(nh_sse2) diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 7d568012cc15..58eaec958c86 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -297,7 +297,7 @@ ___ $code.=<<___; mov \$1,%eax .Lno_key: - ret + RET ___ &end_function("poly1305_init_x86_64"); @@ -373,7 +373,7 @@ $code.=<<___; .cfi_adjust_cfa_offset -48 .Lno_data: .Lblocks_epilogue: - ret + RET .cfi_endproc ___ &end_function("poly1305_blocks_x86_64"); @@ -399,7 +399,7 @@ $code.=<<___; mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET ___ &end_function("poly1305_emit_x86_64"); if ($avx) { @@ -429,7 +429,7 @@ ___ &poly1305_iteration(); $code.=<<___; pop $ctx - ret + RET .size __poly1305_block,.-__poly1305_block .type __poly1305_init_avx,\@abi-omnipotent @@ -594,7 +594,7 @@ __poly1305_init_avx: lea -48-64($ctx),$ctx # size [de-]optimization pop %rbp - ret + RET .size __poly1305_init_avx,.-__poly1305_init_avx ___ @@ -747,7 +747,7 @@ $code.=<<___; .cfi_restore %rbp .Lno_data_avx: .Lblocks_avx_epilogue: - ret + RET .cfi_endproc .align 32 @@ -1452,7 +1452,7 @@ $code.=<<___ if (!$win64); ___ $code.=<<___; vzeroupper - ret + RET .cfi_endproc ___ &end_function("poly1305_blocks_avx"); @@ -1508,7 +1508,7 @@ $code.=<<___; mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET ___ &end_function("poly1305_emit_avx"); @@ -1675,7 +1675,7 @@ $code.=<<___; .cfi_restore %rbp .Lno_data_avx2$suffix: .Lblocks_avx2_epilogue$suffix: - ret + RET .cfi_endproc .align 32 @@ -2201,7 +2201,7 @@ $code.=<<___ if (!$win64); ___ $code.=<<___; vzeroupper - ret + RET .cfi_endproc ___ if($avx > 2 && $avx512) { @@ -2792,7 +2792,7 @@ $code.=<<___ if (!$win64); .cfi_def_cfa_register %rsp ___ $code.=<<___; - ret + RET .cfi_endproc ___ @@ -2893,7 +2893,7 @@ $code.=<<___ if ($flavour =~ /elf32/); ___ $code.=<<___; mov \$1,%eax - ret + RET .size poly1305_init_base2_44,.-poly1305_init_base2_44 ___ { @@ -3010,7 +3010,7 @@ poly1305_blocks_vpmadd52: jnz .Lblocks_vpmadd52_4x .Lno_data_vpmadd52: - ret + RET .size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 ___ } @@ -3451,7 +3451,7 @@ poly1305_blocks_vpmadd52_4x: vzeroall .Lno_data_vpmadd52_4x: - ret + RET .size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x ___ } @@ -3824,7 +3824,7 @@ $code.=<<___; vzeroall .Lno_data_vpmadd52_8x: - ret + RET .size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x ___ } @@ -3861,7 +3861,7 @@ poly1305_emit_base2_44: mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET .size poly1305_emit_base2_44,.-poly1305_emit_base2_44 ___ } } } @@ -3916,7 +3916,7 @@ xor128_encrypt_n_pad: .Ldone_enc: mov $otp,%rax - ret + RET .size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad .globl xor128_decrypt_n_pad @@ -3967,7 +3967,7 @@ xor128_decrypt_n_pad: .Ldone_dec: mov $otp,%rax - ret + RET .size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad ___ } @@ -4109,7 +4109,7 @@ avx_handler: pop %rbx pop %rdi pop %rsi - ret + RET .size avx_handler,.-avx_handler .section .pdata diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index ba9e4c1e7f5c..c985bc15304b 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -605,7 +605,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(__serpent_enc_blk8_avx) .align 8 @@ -659,7 +659,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx) write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(__serpent_dec_blk8_avx) SYM_FUNC_START(serpent_ecb_enc_8way_avx) @@ -677,7 +677,7 @@ SYM_FUNC_START(serpent_ecb_enc_8way_avx) store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); FRAME_END - ret; + RET; SYM_FUNC_END(serpent_ecb_enc_8way_avx) SYM_FUNC_START(serpent_ecb_dec_8way_avx) @@ -695,7 +695,7 @@ SYM_FUNC_START(serpent_ecb_dec_8way_avx) store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); FRAME_END - ret; + RET; SYM_FUNC_END(serpent_ecb_dec_8way_avx) SYM_FUNC_START(serpent_cbc_dec_8way_avx) @@ -713,7 +713,7 @@ SYM_FUNC_START(serpent_cbc_dec_8way_avx) store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); FRAME_END - ret; + RET; SYM_FUNC_END(serpent_cbc_dec_8way_avx) SYM_FUNC_START(serpent_ctr_8way_avx) @@ -733,7 +733,7 @@ SYM_FUNC_START(serpent_ctr_8way_avx) store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); FRAME_END - ret; + RET; SYM_FUNC_END(serpent_ctr_8way_avx) SYM_FUNC_START(serpent_xts_enc_8way_avx) @@ -755,7 +755,7 @@ SYM_FUNC_START(serpent_xts_enc_8way_avx) store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); FRAME_END - ret; + RET; SYM_FUNC_END(serpent_xts_enc_8way_avx) SYM_FUNC_START(serpent_xts_dec_8way_avx) @@ -777,5 +777,5 @@ SYM_FUNC_START(serpent_xts_dec_8way_avx) store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); FRAME_END - ret; + RET; SYM_FUNC_END(serpent_xts_dec_8way_avx) diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index c9648aeae705..ca18948964f4 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -611,7 +611,7 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16) write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(__serpent_enc_blk16) .align 8 @@ -665,7 +665,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk16) write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(__serpent_dec_blk16) SYM_FUNC_START(serpent_ecb_enc_16way) @@ -687,7 +687,7 @@ SYM_FUNC_START(serpent_ecb_enc_16way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(serpent_ecb_enc_16way) SYM_FUNC_START(serpent_ecb_dec_16way) @@ -709,7 +709,7 @@ SYM_FUNC_START(serpent_ecb_dec_16way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(serpent_ecb_dec_16way) SYM_FUNC_START(serpent_cbc_dec_16way) @@ -732,7 +732,7 @@ SYM_FUNC_START(serpent_cbc_dec_16way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(serpent_cbc_dec_16way) SYM_FUNC_START(serpent_ctr_16way) @@ -757,7 +757,7 @@ SYM_FUNC_START(serpent_ctr_16way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(serpent_ctr_16way) SYM_FUNC_START(serpent_xts_enc_16way) @@ -783,7 +783,7 @@ SYM_FUNC_START(serpent_xts_enc_16way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(serpent_xts_enc_16way) SYM_FUNC_START(serpent_xts_dec_16way) @@ -809,5 +809,5 @@ SYM_FUNC_START(serpent_xts_dec_16way) vzeroupper; FRAME_END - ret; + RET; SYM_FUNC_END(serpent_xts_dec_16way) diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S index 6379b99cb722..8ccb03ad7cef 100644 --- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S @@ -553,12 +553,12 @@ SYM_FUNC_START(__serpent_enc_blk_4way) write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); - ret; + RET; .L__enc_xor4: xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); - ret; + RET; SYM_FUNC_END(__serpent_enc_blk_4way) SYM_FUNC_START(serpent_dec_blk_4way) @@ -612,5 +612,5 @@ SYM_FUNC_START(serpent_dec_blk_4way) movl arg_dst(%esp), %eax; write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); - ret; + RET; SYM_FUNC_END(serpent_dec_blk_4way) diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S index efb6dc17dc90..e0998a011d1d 100644 --- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S @@ -675,13 +675,13 @@ SYM_FUNC_START(__serpent_enc_blk_8way) write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); - ret; + RET; .L__enc_xor8: xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(__serpent_enc_blk_8way) SYM_FUNC_START(serpent_dec_blk_8way) @@ -735,5 +735,5 @@ SYM_FUNC_START(serpent_dec_blk_8way) write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); - ret; + RET; SYM_FUNC_END(serpent_dec_blk_8way) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 1e594d60afa5..6fa622652bfc 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -674,7 +674,7 @@ _loop3: pop %r12 pop %rbx - ret + RET SYM_FUNC_END(\name) .endm diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 11efe3a45a1f..b59f3ca62837 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -290,7 +290,7 @@ SYM_FUNC_START(sha1_ni_transform) .Ldone_hash: mov RSPSAVE, %rsp - ret + RET SYM_FUNC_END(sha1_ni_transform) .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index d25668d2a1e9..263f916362e0 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -99,7 +99,7 @@ pop %rbp pop %r12 pop %rbx - ret + RET SYM_FUNC_END(\name) .endm diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 4739cd31b9db..3baa1ec39097 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -458,7 +458,7 @@ done_hash: popq %r13 popq %r12 popq %rbx - ret + RET SYM_FUNC_END(sha256_transform_avx) .section .rodata.cst256.K256, "aM", @progbits, 256 diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 11ff60c29c8b..3439aaf4295d 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -711,7 +711,7 @@ done_hash: popq %r13 popq %r12 popq %rbx - ret + RET SYM_FUNC_END(sha256_transform_rorx) .section .rodata.cst512.K256, "aM", @progbits, 512 diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index ddfa863b4ee3..c4a5db612c32 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -472,7 +472,7 @@ done_hash: popq %r12 popq %rbx - ret + RET SYM_FUNC_END(sha256_transform_ssse3) .section .rodata.cst256.K256, "aM", @progbits, 256 diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 7abade04a3a3..94d50dd27cb5 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -326,7 +326,7 @@ SYM_FUNC_START(sha256_ni_transform) .Ldone_hash: - ret + RET SYM_FUNC_END(sha256_ni_transform) .section .rodata.cst256.K256, "aM", @progbits, 256 diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 63470fd6ae32..34fc71c3524e 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -364,7 +364,7 @@ updateblock: mov frame_RSPSAVE(%rsp), %rsp nowork: - ret + RET SYM_FUNC_END(sha512_transform_avx) ######################################################################## diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 3a44bdcfd583..399fa74ab3d3 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -681,7 +681,7 @@ done_hash: # Restore Stack Pointer mov frame_RSPSAVE(%rsp), %rsp - ret + RET SYM_FUNC_END(sha512_transform_rorx) ######################################################################## diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index 7946a1bee85b..e9b460a54503 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -366,7 +366,7 @@ updateblock: mov frame_RSPSAVE(%rsp), %rsp nowork: - ret + RET SYM_FUNC_END(sha512_transform_ssse3) ######################################################################## diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index a5151393bb2f..c3707a71ef72 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -272,7 +272,7 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8) outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); - ret; + RET; SYM_FUNC_END(__twofish_enc_blk8) .align 8 @@ -312,7 +312,7 @@ SYM_FUNC_START_LOCAL(__twofish_dec_blk8) outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); - ret; + RET; SYM_FUNC_END(__twofish_dec_blk8) SYM_FUNC_START(twofish_ecb_enc_8way) @@ -332,7 +332,7 @@ SYM_FUNC_START(twofish_ecb_enc_8way) store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); FRAME_END - ret; + RET; SYM_FUNC_END(twofish_ecb_enc_8way) SYM_FUNC_START(twofish_ecb_dec_8way) @@ -352,7 +352,7 @@ SYM_FUNC_START(twofish_ecb_dec_8way) store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); FRAME_END - ret; + RET; SYM_FUNC_END(twofish_ecb_dec_8way) SYM_FUNC_START(twofish_cbc_dec_8way) @@ -377,7 +377,7 @@ SYM_FUNC_START(twofish_cbc_dec_8way) popq %r12; FRAME_END - ret; + RET; SYM_FUNC_END(twofish_cbc_dec_8way) SYM_FUNC_START(twofish_ctr_8way) @@ -404,7 +404,7 @@ SYM_FUNC_START(twofish_ctr_8way) popq %r12; FRAME_END - ret; + RET; SYM_FUNC_END(twofish_ctr_8way) SYM_FUNC_START(twofish_xts_enc_8way) @@ -428,7 +428,7 @@ SYM_FUNC_START(twofish_xts_enc_8way) store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); FRAME_END - ret; + RET; SYM_FUNC_END(twofish_xts_enc_8way) SYM_FUNC_START(twofish_xts_dec_8way) @@ -452,5 +452,5 @@ SYM_FUNC_START(twofish_xts_dec_8way) store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); FRAME_END - ret; + RET; SYM_FUNC_END(twofish_xts_dec_8way) diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S index a6f09e4f2e46..3abcad661884 100644 --- a/arch/x86/crypto/twofish-i586-asm_32.S +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -260,7 +260,7 @@ SYM_FUNC_START(twofish_enc_blk) pop %ebx pop %ebp mov $1, %eax - ret + RET SYM_FUNC_END(twofish_enc_blk) SYM_FUNC_START(twofish_dec_blk) @@ -317,5 +317,5 @@ SYM_FUNC_START(twofish_dec_blk) pop %ebx pop %ebp mov $1, %eax - ret + RET SYM_FUNC_END(twofish_dec_blk) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index fc23552afe37..b12d916f08d6 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -258,7 +258,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way) popq %rbx; popq %r12; popq %r13; - ret; + RET; .L__enc_xor3: outunpack_enc3(xor); @@ -266,7 +266,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way) popq %rbx; popq %r12; popq %r13; - ret; + RET; SYM_FUNC_END(__twofish_enc_blk_3way) SYM_FUNC_START(twofish_dec_blk_3way) @@ -301,5 +301,5 @@ SYM_FUNC_START(twofish_dec_blk_3way) popq %rbx; popq %r12; popq %r13; - ret; + RET; SYM_FUNC_END(twofish_dec_blk_3way) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index d2e56232494a..775af290cd19 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -252,7 +252,7 @@ SYM_FUNC_START(twofish_enc_blk) popq R1 movl $1,%eax - ret + RET SYM_FUNC_END(twofish_enc_blk) SYM_FUNC_START(twofish_dec_blk) @@ -304,5 +304,5 @@ SYM_FUNC_START(twofish_dec_blk) popq R1 movl $1,%eax - ret + RET SYM_FUNC_END(twofish_dec_blk) diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 08bf95dbc911..58533752efab 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -21,7 +21,7 @@ CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,) CFLAGS_syscall_x32.o += $(call cc-option,-Wno-override-init,) -obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o +obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o obj-y += vdso/ diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 4bec2597626b..8da971b5e2aa 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include /* @@ -146,27 +148,19 @@ For 32-bit we have the following conventions - kernel is built with .endm -.macro POP_REGS pop_rdi=1 skip_r11rcx=0 +.macro POP_REGS pop_rdi=1 popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx - .if \skip_r11rcx - popq %rsi - .else popq %r11 - .endif popq %r10 popq %r9 popq %r8 popq %rax - .if \skip_r11rcx - popq %rsi - .else popq %rcx - .endif popq %rdx popq %rsi .if \pop_rdi @@ -316,6 +310,66 @@ For 32-bit we have the following conventions - kernel is built with #endif +/* + * IBRS kernel mitigation for Spectre_v2. + * + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers + * the regs it uses (AX, CX, DX). Must be called before the first RET + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) + * + * The optional argument is used to save/restore the current value, + * which is used on the paranoid paths. + * + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ +.macro IBRS_ENTER save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + rdmsr + shl $32, %rdx + or %rdx, %rax + mov %rax, \save_reg + test $SPEC_CTRL_IBRS, %eax + jz .Ldo_wrmsr_\@ + lfence + jmp .Lend_\@ +.Ldo_wrmsr_\@: +.endif + + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) + * regs. Must be called after the last RET. + */ +.macro IBRS_EXIT save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + mov \save_reg, %rdx +.else + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + andl $(~SPEC_CTRL_IBRS), %edx +.endif + + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + /* * Mitigate Spectre v1 for conditional swapgs code paths. * diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S new file mode 100644 index 000000000000..bfb7bcb362bc --- /dev/null +++ b/arch/x86/entry/entry.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common place for both 32- and 64-bit entry routines. + */ + +#include +#include +#include + +.pushsection .noinstr.text, "ax" + +SYM_FUNC_START(entry_ibpb) + movl $MSR_IA32_PRED_CMD, %ecx + movl $PRED_CMD_IBPB, %eax + xorl %edx, %edx + wrmsr + RET +SYM_FUNC_END(entry_ibpb) +/* For KVM */ +EXPORT_SYMBOL_GPL(entry_ibpb); + +.popsection diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index df8c017e6161..8fcd6a42b3a1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include @@ -782,7 +782,6 @@ SYM_CODE_START(__switch_to_asm) movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -791,7 +790,6 @@ SYM_CODE_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* Restore flags or the incoming task to restore AC state. */ popfl @@ -821,7 +819,7 @@ SYM_FUNC_START(schedule_tail_wrapper) popl %eax FRAME_END - ret + RET SYM_FUNC_END(schedule_tail_wrapper) .popsection diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a24ce5905ab8..559c82b83475 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -93,7 +93,7 @@ SYM_CODE_END(native_usergs_sysret64) */ SYM_CODE_START(entry_SYSCALL_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY swapgs /* tss.sp2 is scratch space. */ @@ -117,6 +117,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) /* IRQs are off. */ movq %rax, %rdi movq %rsp, %rsi + + /* clobbers %rax, make sure it is after saving the syscall nr */ + IBRS_ENTER + UNTRAIN_RET + call do_syscall_64 /* returns with IRQs disabled */ /* @@ -191,8 +196,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: - /* rcx and r11 are already restored (see code above) */ - POP_REGS pop_rdi=0 skip_r11rcx=1 + IBRS_EXIT + POP_REGS pop_rdi=0 /* * Now all regs are restored except RSP and RDI. @@ -244,7 +249,6 @@ SYM_FUNC_START(__switch_to_asm) movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -253,7 +257,6 @@ SYM_FUNC_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* restore callee-saved registers */ popq %r15 @@ -500,6 +503,7 @@ SYM_CODE_START(\asmsym) call vc_switch_off_ist movq %rax, %rsp /* Switch to new stack */ + ENCODE_FRAME_POINTER UNWIND_HINT_REGS /* Update pt_regs */ @@ -568,6 +572,7 @@ __irqentry_text_end: SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) + IBRS_EXIT #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ testb $3, CS(%rsp) @@ -675,6 +680,7 @@ native_irq_return_ldt: pushq %rdi /* Stash user RDI */ swapgs /* to kernel GS */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ + UNTRAIN_RET movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ @@ -739,7 +745,7 @@ SYM_FUNC_START(asm_load_gs_index) 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE swapgs FRAME_END - ret + RET SYM_FUNC_END(asm_load_gs_index) EXPORT_SYMBOL(asm_load_gs_index) @@ -798,7 +804,7 @@ SYM_INNER_LABEL(asm_call_irq_on_stack, SYM_L_GLOBAL) /* Restore the previous stack pointer from RBP. */ leaveq - ret + RET SYM_FUNC_END(asm_call_on_stack) #ifdef CONFIG_XEN_PV @@ -887,6 +893,9 @@ SYM_CODE_END(xen_failsafe_callback) * 1 -> no SWAPGS on exit * * Y GSBASE value at entry, must be restored in paranoid_exit + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC @@ -931,7 +940,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * is needed here. */ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx - ret + jmp .Lparanoid_gsbase_done .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ @@ -950,9 +959,17 @@ SYM_CODE_START_LOCAL(paranoid_entry) xorl %ebx, %ebx swapgs .Lparanoid_kernel_gsbase: - FENCE_SWAPGS_KERNEL_ENTRY - ret +.Lparanoid_gsbase_done: + + /* + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like + * CR3 above, keep the old value in a callee saved register. + */ + IBRS_ENTER save_reg=%r15 + UNTRAIN_RET + + RET SYM_CODE_END(paranoid_entry) /* @@ -973,9 +990,19 @@ SYM_CODE_END(paranoid_entry) * 1 -> no SWAPGS on exit * * Y User space GSBASE, must be restored unconditionally + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS + + /* + * Must restore IBRS state before both CR3 and %GS since we need access + * to the per-CPU x86_spec_ctrl_shadow variable. + */ + IBRS_EXIT save_reg=%r15 + /* * The order of operations is important. RESTORE_CR3 requires * kernel GSBASE. @@ -1022,8 +1049,11 @@ SYM_CODE_START_LOCAL(error_entry) FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ popq %r12 /* save return addr in %12 */ movq %rsp, %rdi /* arg0 = pt_regs pointer */ @@ -1031,7 +1061,7 @@ SYM_CODE_START_LOCAL(error_entry) movq %rax, %rsp /* switch stack */ ENCODE_FRAME_POINTER pushq %r12 - ret + RET /* * There are two places in the kernel that can potentially fault with @@ -1062,7 +1092,8 @@ SYM_CODE_START_LOCAL(error_entry) */ .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY - ret + ANNOTATE_UNRET_END + RET .Lbstep_iret: /* Fix truncated RIP */ @@ -1077,6 +1108,8 @@ SYM_CODE_START_LOCAL(error_entry) SWAPGS FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET /* * Pretend that the exception came from user mode: set up pt_regs @@ -1181,6 +1214,9 @@ SYM_CODE_START(asm_exc_nmi) PUSH_AND_CLEAR_REGS rdx=(%rdx) ENCODE_FRAME_POINTER + IBRS_ENTER + UNTRAIN_RET + /* * At this point we no longer need to worry about stack damage * due to nesting -- we're on the normal thread stack and we're @@ -1403,6 +1439,9 @@ end_repeat_nmi: movq $-1, %rsi call exc_nmi + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ + IBRS_EXIT save_reg=%r15 + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0051cf5c792d..4d637a965efb 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -4,7 +4,6 @@ * * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include "calling.h" #include #include #include @@ -14,9 +13,12 @@ #include #include #include +#include #include #include +#include "calling.h" + .section .entry.text, "ax" /* @@ -47,7 +49,7 @@ * 0(%ebp) arg6 */ SYM_CODE_START(entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY /* Interrupts are off on entry. */ SWAPGS @@ -112,6 +114,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) cld + IBRS_ENTER + UNTRAIN_RET + /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -197,7 +202,7 @@ SYM_CODE_END(entry_SYSENTER_compat) * 0(%esp) arg6 */ SYM_CODE_START(entry_SYSCALL_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY /* Interrupts are off on entry. */ swapgs @@ -252,6 +257,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) UNWIND_HINT_REGS + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -266,6 +274,8 @@ sysret32_from_system_call: */ STACKLEAK_ERASE + IBRS_EXIT + movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ @@ -339,7 +349,7 @@ SYM_CODE_END(entry_SYSCALL_compat) * ebp arg6 */ SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY /* * Interrupts are off on entry. */ @@ -409,6 +419,9 @@ SYM_CODE_START(entry_INT80_compat) cld + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_int80_syscall_32 jmp swapgs_restore_regs_and_return_to_usermode diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index f1f96d4d8cd6..7591bab060f7 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -24,7 +24,7 @@ SYM_CODE_START_NOALIGN(\name) popl %edx popl %ecx popl %eax - ret + RET _ASM_NOKPROBE(\name) SYM_CODE_END(\name) .endm diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index c9a9fbf1655f..1b5044ad8cd0 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -55,7 +55,7 @@ SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) popq %rsi popq %rdi popq %rbp - ret + RET _ASM_NOKPROBE(__thunk_restore) SYM_CODE_END(__thunk_restore) #endif diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index b36f42965b62..64ff8f7f360e 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -91,6 +91,7 @@ endif endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index de1fff7188aa..c92a8fef2ed2 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -6,7 +6,7 @@ #include #include #include -#include +#include .text .globl __kernel_vsyscall @@ -78,7 +78,7 @@ SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL) popl %ecx CFI_RESTORE ecx CFI_ADJUST_CFA_OFFSET -4 - ret + RET CFI_ENDPROC .size __kernel_vsyscall,.-__kernel_vsyscall diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 9185cb1d13b9..5876289e48d8 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -440,7 +440,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) static __init int vdso_setup(char *s) { vdso64_enabled = simple_strtoul(s, NULL, 0); - return 0; + return 1; } __setup("vdso=", vdso_setup); diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S index 2e203f3a25a7..ef2dd1827243 100644 --- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S +++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S @@ -20,16 +20,19 @@ __vsyscall_page: mov $__NR_gettimeofday, %rax syscall ret + int3 .balign 1024, 0xcc mov $__NR_time, %rax syscall ret + int3 .balign 1024, 0xcc mov $__NR_getcpu, %rax syscall ret + int3 .balign 4096, 0xcc diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index ccc9ee1971e8..8a85658a24cc 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -312,6 +312,16 @@ static int perf_ibs_init(struct perf_event *event) hwc->config_base = perf_ibs->msr; hwc->config = config; + /* + * rip recorded by IbsOpRip will not be consistent with rsp and rbp + * recorded as part of interrupt regs. Thus we need to use rip from + * interrupt regs while unwinding call stack. Setting _EARLY flag + * makes sure we unwind call-stack before perf sample rip is set to + * IbsOpRip. + */ + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; + return 0; } @@ -692,6 +702,14 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) data.raw = &raw; } + /* + * rip recorded by IbsOpRip will not be consistent with rsp and rbp + * recorded as part of interrupt regs. Thus we need to use rip from + * interrupt regs while unwinding call stack. + */ + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + data.callchain = perf_callchain(event, iregs); + throttle = perf_event_overflow(event, &data, ®s); out: if (throttle) { @@ -764,9 +782,10 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) return ret; } -static __init void perf_event_ibs_init(void) +static __init int perf_event_ibs_init(void) { struct attribute **attr = ibs_op_format_attrs; + int ret; /* * Some chips fail to reset the fetch count when it is written; instead @@ -778,7 +797,9 @@ static __init void perf_event_ibs_init(void) if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; - perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); + ret = perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); + if (ret) + return ret; if (ibs_caps & IBS_CAPS_OPCNT) { perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; @@ -791,15 +812,35 @@ static __init void perf_event_ibs_init(void) perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; } - perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); + ret = perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); + if (ret) + goto err_op; + + ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); + if (ret) + goto err_nmi; - register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); + return 0; + +err_nmi: + perf_pmu_unregister(&perf_ibs_op.pmu); + free_percpu(perf_ibs_op.pcpu); + perf_ibs_op.pcpu = NULL; +err_op: + perf_pmu_unregister(&perf_ibs_fetch.pmu); + free_percpu(perf_ibs_fetch.pcpu); + perf_ibs_fetch.pcpu = NULL; + + return ret; } #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ -static __init void perf_event_ibs_init(void) { } +static __init int perf_event_ibs_init(void) +{ + return 0; +} #endif @@ -1069,9 +1110,7 @@ static __init int amd_ibs_init(void) x86_pmu_amd_ibs_starting_cpu, x86_pmu_amd_ibs_dying_cpu); - perf_event_ibs_init(); - - return 0; + return perf_event_ibs_init(); } /* Since we need the pci subsystem to init ibs we can't do this earlier: */ diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 5ba13b00e3a7..f6eadf9320a1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -254,7 +254,7 @@ static struct event_constraint intel_icl_event_constraints[] = { INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ - INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */ diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h index 1b07fb102c4e..07949102a08d 100644 --- a/arch/x86/include/asm/GEN-for-each-reg.h +++ b/arch/x86/include/asm/GEN-for-each-reg.h @@ -1,11 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * These are in machine order; things rely on that. + */ #ifdef CONFIG_64BIT GEN(rax) -GEN(rbx) GEN(rcx) GEN(rdx) +GEN(rbx) +GEN(rsp) +GEN(rbp) GEN(rsi) GEN(rdi) -GEN(rbp) GEN(r8) GEN(r9) GEN(r10) @@ -16,10 +21,11 @@ GEN(r14) GEN(r15) #else GEN(eax) -GEN(ebx) GEN(ecx) GEN(edx) +GEN(ebx) +GEN(esp) +GEN(ebp) GEN(esi) GEN(edi) -GEN(ebp) #endif diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h index 9aff97f0de7f..d937c55e717e 100644 --- a/arch/x86/include/asm/acenv.h +++ b/arch/x86/include/asm/acenv.h @@ -13,7 +13,19 @@ /* Asm macros */ -#define ACPI_FLUSH_CPU_CACHE() wbinvd() +/* + * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states. + * It is required to prevent data loss. + * + * While running inside virtual machine, the kernel can bypass cache flushing. + * Changing sleep state in a virtual machine doesn't affect the host system + * sleep state and cannot lead to data loss. + */ +#define ACPI_FLUSH_CPU_CACHE() \ +do { \ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \ + wbinvd(); \ +} while (0) int __acpi_acquire_global_lock(unsigned int *lock); int __acpi_release_global_lock(unsigned int *lock); diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h deleted file mode 100644 index 464034db299f..000000000000 --- a/arch/x86/include/asm/alternative-asm.h +++ /dev/null @@ -1,114 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_ALTERNATIVE_ASM_H -#define _ASM_X86_ALTERNATIVE_ASM_H - -#ifdef __ASSEMBLY__ - -#include - -#ifdef CONFIG_SMP - .macro LOCK_PREFIX -672: lock - .pushsection .smp_locks,"a" - .balign 4 - .long 672b - . - .popsection - .endm -#else - .macro LOCK_PREFIX - .endm -#endif - -/* - * objtool annotation to ignore the alternatives and only consider the original - * instruction(s). - */ -.macro ANNOTATE_IGNORE_ALTERNATIVE - .Lannotate_\@: - .pushsection .discard.ignore_alts - .long .Lannotate_\@ - . - .popsection -.endm - -/* - * Issue one struct alt_instr descriptor entry (need to put it into - * the section .altinstructions, see below). This entry contains - * enough information for the alternatives patching code to patch an - * instruction. See apply_alternatives(). - */ -.macro altinstruction_entry orig alt feature orig_len alt_len pad_len - .long \orig - . - .long \alt - . - .word \feature - .byte \orig_len - .byte \alt_len - .byte \pad_len -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. ".skip" directive takes care of proper instruction padding - * in case @newinstr is longer than @oldinstr. - */ -.macro ALTERNATIVE oldinstr, newinstr, feature -140: - \oldinstr -141: - .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr -144: - .popsection -.endm - -#define old_len 141b-140b -#define new_len1 144f-143f -#define new_len2 145f-144f - -/* - * gas compatible max based on the idea from: - * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax - * - * The additional "-" is needed because gas uses a "true" value of -1. - */ -#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) - - -/* - * Same as ALTERNATIVE macro above but for two alternatives. If CPU - * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has - * @feature2, it replaces @oldinstr with @feature2. - */ -.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 -140: - \oldinstr -141: - .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ - (alt_max_short(new_len1, new_len2) - (old_len)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b - altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr1 -144: - \newinstr2 -145: - .popsection -.endm - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_X86_ALTERNATIVE_ASM_H */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 13adca37c99a..0e777b27972b 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -2,13 +2,17 @@ #ifndef _ASM_X86_ALTERNATIVE_H #define _ASM_X86_ALTERNATIVE_H -#ifndef __ASSEMBLY__ - #include -#include #include #include +#define ALTINSTR_FLAG_INV (1 << 15) +#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) + +#ifndef __ASSEMBLY__ + +#include + /* * Alternative inline assembly for SMP. * @@ -61,7 +65,6 @@ struct alt_instr { u16 cpuid; /* cpuid bit set for replacement */ u8 instrlen; /* length of original instruction */ u8 replacementlen; /* length of new instruction */ - u8 padlen; /* length of build-time padding */ } __packed; /* @@ -72,6 +75,8 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); struct module; @@ -100,7 +105,6 @@ static inline int alternatives_text_reserved(void *start, void *end) #define alt_end_marker "663" #define alt_slen "662b-661b" -#define alt_pad_len alt_end_marker"b-662b" #define alt_total_slen alt_end_marker"b-661b" #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" @@ -147,8 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end) " .long " b_replacement(num)"f - .\n" /* new instruction */ \ " .word " __stringify(feature) "\n" /* feature bit */ \ " .byte " alt_total_slen "\n" /* source len */ \ - " .byte " alt_rlen(num) "\n" /* replacement len */ \ - " .byte " alt_pad_len "\n" /* pad len */ + " .byte " alt_rlen(num) "\n" /* replacement len */ #define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ "# ALT: replacement " #num "\n" \ @@ -175,6 +178,11 @@ static inline int alternatives_text_reserved(void *start, void *end) ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ ".popsection\n" +/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ +#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ + ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ + newinstr_yes, feature) + #define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \ OLDINSTR_3(oldinsn, 1, 2, 3) \ ".pushsection .altinstructions,\"a\"\n" \ @@ -206,15 +214,15 @@ static inline int alternatives_text_reserved(void *start, void *end) #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") +#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \ + asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) ::: "memory") + /* * Alternative inline assembly with input. * * Peculiarities: * No memory clobber here. * Argument numbers start with 1. - * Best is to use constraints that are fixed size (like (%1) ... "r") - * If you use variable sized constraints like "m" or "g" in the - * replacement make sure to pad to the worst case length. * Leaving an unused argument 0 to keep API compatibility. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ @@ -271,6 +279,115 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_SMP + .macro LOCK_PREFIX +672: lock + .pushsection .smp_locks,"a" + .balign 4 + .long 672b - . + .popsection + .endm +#else + .macro LOCK_PREFIX + .endm +#endif + +/* + * objtool annotation to ignore the alternatives and only consider the original + * instruction(s). + */ +.macro ANNOTATE_IGNORE_ALTERNATIVE + .Lannotate_\@: + .pushsection .discard.ignore_alts + .long .Lannotate_\@ - . + .popsection +.endm + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .word \feature + .byte \orig_len + .byte \alt_len +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature +140: + \oldinstr +141: + .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + \newinstr +144: + .popsection +.endm + +#define old_len 141b-140b +#define new_len1 144f-143f +#define new_len2 145f-144f + +/* + * gas compatible max based on the idea from: + * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + * + * The additional "-" is needed because gas uses a "true" value of -1. + */ +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +140: + \oldinstr +141: + .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f + altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + \newinstr1 +144: + \newinstr2 +145: + .popsection +.endm + +/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ +#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ + ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ + newinstr_yes, feature + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 51e2bf27cc9b..8f80de627c60 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -17,20 +17,3 @@ extern void cmpxchg8b_emu(void); #endif -#ifdef CONFIG_RETPOLINE - -#define DECL_INDIRECT_THUNK(reg) \ - extern asmlinkage void __x86_indirect_thunk_ ## reg (void); - -#define DECL_RETPOLINE(reg) \ - extern asmlinkage void __x86_retpoline_ ## reg (void); - -#undef GEN -#define GEN(reg) DECL_INDIRECT_THUNK(reg) -#include - -#undef GEN -#define GEN(reg) DECL_RETPOLINE(reg) -#include - -#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 59bf91c57aa8..f4cbc01c0bc4 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -8,6 +8,7 @@ #include #include +#include enum cpuid_leafs { @@ -49,7 +50,7 @@ extern const char * const x86_power_flags[32]; extern const char * const x86_bug_flags[NBUGINTS*32]; #define test_cpu_cap(c, bit) \ - test_bit(bit, (unsigned long *)((c)->x86_capability)) + arch_test_bit(bit, (unsigned long *)((c)->x86_capability)) /* * There are 32 bits/features in each mask word. The high bits @@ -172,39 +173,15 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" - "2:\n" - ".skip -(((5f-4f) - (2b-1b)) > 0) * " - "((5f-4f) - (2b-1b)),0x90\n" - "3:\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 4f - .\n" /* repl offset */ - " .word %P[always]\n" /* always replace */ - " .byte 3b - 1b\n" /* src len */ - " .byte 5f - 4f\n" /* repl len */ - " .byte 3b - 2b\n" /* pad len */ - ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" - "4: jmp %l[t_no]\n" - "5:\n" - ".previous\n" - ".section .altinstructions,\"a\"\n" - " .long 1b - .\n" /* src offset */ - " .long 0\n" /* no replacement */ - " .word %P[feature]\n" /* feature bit */ - " .byte 3b - 1b\n" /* src len */ - " .byte 0\n" /* repl len */ - " .byte 0\n" /* pad len */ - ".previous\n" - ".section .altinstr_aux,\"ax\"\n" - "6:\n" - " testb %[bitnum],%[cap_byte]\n" - " jnz %l[t_yes]\n" - " jmp %l[t_no]\n" - ".previous\n" + asm_volatile_goto( + ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") + ".section .altinstr_aux,\"ax\"\n" + "6:\n" + " testb %[bitnum],%[cap_byte]\n" + " jnz %l[t_yes]\n" + " jmp %l[t_no]\n" + ".previous\n" : : [feature] "i" (bit), - [always] "i" (X86_FEATURE_ALWAYS), [bitnum] "i" (1 << (bit & 7)), [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) : : t_yes, t_no); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3b407f46f1a0..37ba0cdf99aa 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -290,6 +290,16 @@ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ +/* FREE! (11*32+ 8) */ +/* FREE! (11*32+ 9) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -308,6 +318,7 @@ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -417,5 +428,8 @@ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 09db5b8f1444..16c24915210d 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,6 +56,25 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) +#endif + /* Force disable because it's broken beyond repair */ #define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) @@ -73,7 +92,7 @@ #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_SMAP) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 4cf2ad521f65..b56c5741581a 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -6,7 +6,7 @@ * * Written by Masami Hiramatsu */ -#include +#include /* __ignore_sync_check__ */ /* * Internal bits. Don't use bitmasks directly, because these bits are diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index c1438f9239e4..594ad2f57d23 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -26,7 +26,7 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]); int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]); -bool insn_decode(struct insn *insn, struct pt_regs *regs, - unsigned char buf[MAX_INSN_SIZE], int buf_size); +bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE], int buf_size); #endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index a8c3d284fa46..0da37756917a 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -8,7 +8,7 @@ */ /* insn_attr_t is defined in inat.h */ -#include +#include /* __ignore_sync_check__ */ struct insn_field { union { @@ -87,13 +87,25 @@ struct insn { #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); -extern void insn_get_prefixes(struct insn *insn); -extern void insn_get_opcode(struct insn *insn); -extern void insn_get_modrm(struct insn *insn); -extern void insn_get_sib(struct insn *insn); -extern void insn_get_displacement(struct insn *insn); -extern void insn_get_immediate(struct insn *insn); -extern void insn_get_length(struct insn *insn); +extern int insn_get_prefixes(struct insn *insn); +extern int insn_get_opcode(struct insn *insn); +extern int insn_get_modrm(struct insn *insn); +extern int insn_get_sib(struct insn *insn); +extern int insn_get_displacement(struct insn *insn); +extern int insn_get_immediate(struct insn *insn); +extern int insn_get_length(struct insn *insn); + +enum insn_mode { + INSN_MODE_32, + INSN_MODE_64, + /* Mode is determined by the current kernel build. */ + INSN_MODE_KERN, + INSN_NUM_MODES, +}; + +extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); + +#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN) /* Attribute will be determined after getting ModRM (for opcode groups) */ static inline void insn_get_attribute(struct insn *insn) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 6802c59e8252..8e80c2655ca9 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -191,6 +191,14 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages); #define arch_kexec_pre_free_pages arch_kexec_pre_free_pages +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif #endif typedef void crash_vmclear_fn(void); diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 365111789cc6..5000cf59bdf5 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -18,6 +18,28 @@ #define __ALIGN_STR __stringify(__ALIGN) #endif +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define RET jmp __x86_return_thunk +#else /* CONFIG_RETPOLINE */ +#ifdef CONFIG_SLS +#define RET ret; int3 +#else +#define RET ret +#endif +#endif /* CONFIG_RETPOLINE */ + +#else /* __ASSEMBLY__ */ + +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define ASM_RET "jmp __x86_return_thunk\n\t" +#else /* CONFIG_RETPOLINE */ +#ifdef CONFIG_SLS +#define ASM_RET "ret; int3\n\t" +#else +#define ASM_RET "ret\n\t" +#endif +#endif /* CONFIG_RETPOLINE */ + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 30f76b966857..871a8b06d430 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -247,13 +247,6 @@ bool hv_vcpu_is_preempted(int vcpu); static inline void hv_apic_init(void) {} #endif -static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, - struct msi_desc *msi_desc) -{ - msi_entry->address = msi_desc->msg.address_lo; - msi_entry->data = msi_desc->msg.data; -} - #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 972a34d93505..144dc164b759 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -91,6 +93,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass @@ -114,6 +117,41 @@ * Not susceptible to * TSX Async Abort (TAA) vulnerabilities. */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ +#define ARCH_CAP_PBRSB_NO BIT(24) /* + * Not susceptible to Post-Barrier + * Return Stack Buffer Predictions. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -131,6 +169,7 @@ /* SRBDS support */ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 #define RNGDS_MITG_DIS BIT(0) +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 @@ -482,6 +521,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4d0f5386e637..0acd99329923 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -5,12 +5,15 @@ #include #include +#include #include -#include #include #include #include +#include + +#define RETPOLINE_THUNK_SIZE 32 /* * Fill the CPU return stack buffer. @@ -57,7 +60,9 @@ 774: \ add $(BITS_PER_LONG/8) * 2, sp; \ dec reg; \ - jnz 771b; + jnz 771b; \ + /* barrier for jnz misprediction */ \ + lfence; #ifdef __ASSEMBLY__ @@ -73,6 +78,23 @@ .popsection .endm +/* + * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions + * vs RETBleed validation. + */ +#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE + +/* + * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should + * eventually turn into it's own annotation. + */ +.macro ANNOTATE_UNRET_END +#ifdef CONFIG_DEBUG_ENTRY + ANNOTATE_RETPOLINE_SAFE + nop +#endif +.endm + /* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 @@ -81,7 +103,7 @@ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ - __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE #else jmp *%\reg @@ -91,22 +113,61 @@ .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ - __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE #else call *%\reg #endif .endm +.macro ISSUE_UNBALANCED_RET_GUARD + ANNOTATE_INTRA_FUNCTION_CALL + call .Lunbalanced_ret_guard_\@ + int3 +.Lunbalanced_ret_guard_\@: + add $(BITS_PER_LONG/8), %_ASM_SP + lfence +.endm + /* * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP * monstrosity above, manually. */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req -#ifdef CONFIG_RETPOLINE +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 +.ifb \ftr2 ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr +.else + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 +.endif __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) +.Lunbalanced_\@: + ISSUE_UNBALANCED_RET_GUARD .Lskip_rsb_\@: +.endm + +#ifdef CONFIG_CPU_UNRET_ENTRY +#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#else +#define CALL_ZEN_UNTRAIN_RET "" +#endif + +/* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * + * While zen_untrain_ret() doesn't clobber anything but requires stack, + * entry_ibpb() will clobber AX, CX, DX. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ +.macro UNTRAIN_RET +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) + ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm @@ -118,7 +179,21 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" +extern void __x86_return_thunk(void); +extern void zen_untrain_ret(void); +extern void entry_ibpb(void); + #ifdef CONFIG_RETPOLINE + +typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; + +#define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; +#include +#undef GEN + +extern retpoline_thunk_t __x86_indirect_thunk_array[]; + #ifdef CONFIG_X86_64 /* @@ -129,7 +204,7 @@ ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - "call __x86_retpoline_%V[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ X86_FEATURE_RETPOLINE, \ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ @@ -181,6 +256,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_EIBRS, SPECTRE_V2_EIBRS_RETPOLINE, SPECTRE_V2_EIBRS_LFENCE, + SPECTRE_V2_IBRS, }; /* The indirect branch speculation control variants */ @@ -223,6 +299,9 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); +extern void write_spec_ctrl_current(u64 val, bool force); +extern u64 spec_ctrl_current(void); /* * With retpoline, we must use IBRS to restrict branch prediction @@ -232,18 +311,18 @@ extern u64 x86_spec_ctrl_base; */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ - \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ + X86_FEATURE_USE_IBPB_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_base; \ - \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current(), \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) @@ -255,6 +334,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + #include /** @@ -304,63 +385,4 @@ static inline void mds_idle_clear_cpu_buffers(void) #endif /* __ASSEMBLY__ */ -/* - * Below is used in the eBPF JIT compiler and emits the byte sequence - * for the following assembly: - * - * With retpolines configured: - * - * callq do_rop - * spec_trap: - * pause - * lfence - * jmp spec_trap - * do_rop: - * mov %rcx,(%rsp) for x86_64 - * mov %edx,(%esp) for x86_32 - * retq - * - * Without retpolines configured: - * - * jmp *%rcx for x86_64 - * jmp *%edx for x86_32 - */ -#ifdef CONFIG_RETPOLINE -# ifdef CONFIG_X86_64 -# define RETPOLINE_RCX_BPF_JIT_SIZE 17 -# define RETPOLINE_RCX_BPF_JIT() \ -do { \ - EMIT1_off32(0xE8, 7); /* callq do_rop */ \ - /* spec_trap: */ \ - EMIT2(0xF3, 0x90); /* pause */ \ - EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ - EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ - /* do_rop: */ \ - EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \ - EMIT1(0xC3); /* retq */ \ -} while (0) -# else /* !CONFIG_X86_64 */ -# define RETPOLINE_EDX_BPF_JIT() \ -do { \ - EMIT1_off32(0xE8, 7); /* call do_rop */ \ - /* spec_trap: */ \ - EMIT2(0xF3, 0x90); /* pause */ \ - EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ - EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ - /* do_rop: */ \ - EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ - EMIT1(0xC3); /* ret */ \ -} while (0) -# endif -#else /* !CONFIG_RETPOLINE */ -# ifdef CONFIG_X86_64 -# define RETPOLINE_RCX_BPF_JIT_SIZE 2 -# define RETPOLINE_RCX_BPF_JIT() \ - EMIT2(0xFF, 0xE1); /* jmp *%rcx */ -# else /* !CONFIG_X86_64 */ -# define RETPOLINE_EDX_BPF_JIT() \ - EMIT2(0xFF, 0xE2) /* jmp *%edx */ -# endif -#endif - #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5647bcdba776..4a32b0d34376 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -630,7 +630,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); "call " #func ";" \ PV_RESTORE_ALL_CALLER_REGS \ FRAME_END \ - "ret;" \ + ASM_RET \ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ ".popsection") diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 159622ee0674..1474cf96251d 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -48,7 +48,7 @@ asm (".pushsection .text;" "jne .slowpath;" "pop %rdx;" FRAME_END - "ret;" + ASM_RET ".slowpath: " "push %rsi;" "movzbl %al,%esi;" @@ -56,7 +56,7 @@ asm (".pushsection .text;" "pop %rsi;" "pop %rdx;" FRAME_END - "ret;" + ASM_RET ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" ".popsection"); diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 8b58d6975d5d..ea1d8eb644cb 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -11,6 +11,7 @@ #include #include +#include /* "Raw" instruction opcodes */ #define __ASM_CLAC ".byte 0x0f,0x01,0xca" @@ -18,8 +19,6 @@ #ifdef __ASSEMBLY__ -#include - #ifdef CONFIG_X86_SMAP #define ASM_CLAC \ @@ -37,8 +36,6 @@ #else /* __ASSEMBLY__ */ -#include - #ifdef CONFIG_X86_SMAP static __always_inline void clac(void) diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index cbb67b6030f9..491aadfac611 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -21,6 +21,16 @@ * relative displacement across sections. */ +/* + * The trampoline is 8 bytes and of the general form: + * + * jmp.d32 \func + * ud1 %esp, %ecx + * + * That trailing #UD provides both a speculation stop and serves as a unique + * 3 byte signature identifying static call trampolines. Also see tramp_ud[] + * and __static_call_fixup(). + */ #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ @@ -34,8 +44,13 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") +#ifdef CONFIG_RETHUNK #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ - __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop") + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") +#else +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#endif #define ARCH_ADD_TRAMP_KEY(name) \ @@ -44,4 +59,6 @@ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ ".popsection \n") +extern bool __static_call_fixup(void *tramp, u8 op, void *dest); + #endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index fdbd9d7b7bca..3b97aa921543 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -21,7 +21,6 @@ struct saved_context { #endif unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; - bool misc_enable_saved; struct saved_msrs saved_msrs; struct desc_ptr gdt_desc; struct desc_ptr idt; @@ -30,6 +29,7 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + bool misc_enable_saved; } __attribute__((packed)); /* routines for saving/restoring kernel state */ diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 35bb35d28733..54df06687d83 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -14,9 +14,13 @@ * Image of the saved processor state, used by the low level ACPI suspend to * RAM code and by the low level hibernation code. * - * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that - * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c, - * still work as required. + * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S + * and make sure that __save/__restore_processor_state(), defined in + * arch/x86/power/cpu.c, still work as required. + * + * Because the structure is packed, make sure to avoid unaligned members. For + * optimisation purposes but also because tools like kmemleak only search for + * pointers that are aligned. */ struct saved_context { struct pt_regs regs; @@ -36,7 +40,6 @@ struct saved_context { unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; - bool misc_enable_saved; struct saved_msrs saved_msrs; unsigned long efer; u16 gdt_pad; /* Unused */ @@ -48,6 +51,7 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + bool misc_enable_saved; } __attribute__((packed)); #define loaddebug(thread,register) \ diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index a4a8b1b16c0c..956e4145311b 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h @@ -5,6 +5,15 @@ #include #include +static inline unsigned long random_get_entropy(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) + return random_get_entropy_fallback(); + return rdtsc(); +} +#define random_get_entropy random_get_entropy + /* Assume we use the PIT time source for the clock tick */ #define CLOCK_TICK_RATE PIT_TICK_RATE diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 01a300a9700b..fbdc3d951494 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -20,13 +20,12 @@ extern void disable_TSC(void); static inline cycles_t get_cycles(void) { -#ifndef CONFIG_X86_TSC - if (!boot_cpu_has(X86_FEATURE_TSC)) + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) return 0; -#endif - return rdtsc(); } +#define get_cycles get_cycles extern struct system_counterval_t convert_art_to_tsc(u64 art); extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 664d4610d700..56664b31b6da 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -8,7 +8,11 @@ #ifdef __ASSEMBLY__ .macro UNWIND_HINT_EMPTY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 + UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 +.endm + +.macro UNWIND_HINT_ENTRY + UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 .endm .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 @@ -48,17 +52,16 @@ UNWIND_HINT_REGS base=\base offset=\offset partial=1 .endm -.macro UNWIND_HINT_FUNC sp_offset=8 - UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=\sp_offset type=UNWIND_HINT_TYPE_CALL +.macro UNWIND_HINT_FUNC + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm -/* - * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN - * and sibling calls. On these, sp_offset denotes the expected offset from - * initial_func_cfi. - */ -.macro UNWIND_HINT_RET_OFFSET sp_offset=8 - UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset +.macro UNWIND_HINT_SAVE + UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE +.endm + +.macro UNWIND_HINT_RESTORE + UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index daf88f8143c5..cf69081073b5 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -60,7 +60,7 @@ save_registers: popl saved_context_eflags movl $ret_point, saved_eip - ret + RET restore_registers: @@ -70,7 +70,7 @@ restore_registers: movl saved_context_edi, %edi pushl saved_context_eflags popfl - ret + RET SYM_CODE_START(do_suspend_lowlevel) call save_processor_state @@ -86,7 +86,7 @@ SYM_CODE_START(do_suspend_lowlevel) ret_point: call restore_registers call restore_processor_state - ret + RET SYM_CODE_END(do_suspend_lowlevel) .data diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c5f58615f448..ef107b352dd1 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -28,6 +28,7 @@ #include #include #include +#include int __read_mostly alternatives_patched; @@ -268,6 +269,8 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } } +extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __return_sites[], __return_sites_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); @@ -337,26 +340,70 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff) n_dspl, (unsigned long)orig_insn + n_dspl + repl_len); } +/* + * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90) + * + * @instr: instruction byte stream + * @instrlen: length of the above + * @off: offset within @instr where the first NOP has been detected + * + * Return: number of NOPs found (and replaced). + */ +static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) +{ + unsigned long flags; + int i = off, nnops; + + while (i < instrlen) { + if (instr[i] != 0x90) + break; + + i++; + } + + nnops = i - off; + + if (nnops <= 1) + return nnops; + + local_irq_save(flags); + add_nops(instr + off, nnops); + local_irq_restore(flags); + + DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); + + return nnops; +} + /* * "noinline" to cause control flow change and thus invalidate I$ and * cause refetch after modification. */ -static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) +static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) { - unsigned long flags; - int i; + struct insn insn; + int i = 0; - for (i = 0; i < a->padlen; i++) { - if (instr[i] != 0x90) + /* + * Jump over the non-NOP insns and optimize single-byte NOPs into bigger + * ones. + */ + for (;;) { + if (insn_decode_kernel(&insn, &instr[i])) + return; + + /* + * See if this and any potentially following NOPs can be + * optimized. + */ + if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) + i += optimize_nops_range(instr, len, i); + else + i += insn.length; + + if (i >= len) return; } - - local_irq_save(flags); - add_nops(instr + (a->instrlen - a->padlen), a->padlen); - local_irq_restore(flags); - - DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", - instr, a->instrlen - a->padlen, a->padlen); } /* @@ -388,23 +435,29 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, */ for (a = start; a < end; a++) { int insn_buff_sz = 0; + /* Mask away "NOT" flag bit for feature to test. */ + u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV; instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; BUG_ON(a->instrlen > sizeof(insn_buff)); - BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); - if (!boot_cpu_has(a->cpuid)) { - if (a->padlen > 1) - optimize_nops(a, instr); + BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32); - continue; - } + /* + * Patch if either: + * - feature is present + * - feature not present but ALTINSTR_FLAG_INV is set to mean, + * patch if feature is *NOT* present. + */ + if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) + goto next; - DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d", - a->cpuid >> 5, - a->cpuid & 0x1f, + DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)", + (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "", + feature >> 5, + feature & 0x1f, instr, instr, a->instrlen, - replacement, a->replacementlen, a->padlen); + replacement, a->replacementlen); DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); @@ -428,17 +481,262 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, if (a->replacementlen && is_jmp(replacement[0])) recompute_jump(a, instr, replacement, insn_buff); - if (a->instrlen > a->replacementlen) { - add_nops(insn_buff + a->replacementlen, - a->instrlen - a->replacementlen); - insn_buff_sz += a->instrlen - a->replacementlen; - } + for (; insn_buff_sz < a->instrlen; insn_buff_sz++) + insn_buff[insn_buff_sz] = 0x90; + DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); text_poke_early(instr, insn_buff, insn_buff_sz); + +next: + optimize_nops(instr, a->instrlen); } } +#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION) + +/* + * CALL/JMP *%\reg + */ +static int emit_indirect(int op, int reg, u8 *bytes) +{ + int i = 0; + u8 modrm; + + switch (op) { + case CALL_INSN_OPCODE: + modrm = 0x10; /* Reg = 2; CALL r/m */ + break; + + case JMP32_INSN_OPCODE: + modrm = 0x20; /* Reg = 4; JMP r/m */ + break; + + default: + WARN_ON_ONCE(1); + return -1; + } + + if (reg >= 8) { + bytes[i++] = 0x41; /* REX.B prefix */ + reg -= 8; + } + + modrm |= 0xc0; /* Mod = 3 */ + modrm += reg; + + bytes[i++] = 0xff; /* opcode */ + bytes[i++] = modrm; + + return i; +} + +/* + * Rewrite the compiler generated retpoline thunk calls. + * + * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate + * indirect instructions, avoiding the extra indirection. + * + * For example, convert: + * + * CALL __x86_indirect_thunk_\reg + * + * into: + * + * CALL *%\reg + * + * It also tries to inline spectre_v2=retpoline,amd when size permits. + */ +static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) +{ + retpoline_thunk_t *target; + int reg, ret, i = 0; + u8 op, cc; + + target = addr + insn->length + insn->immediate.value; + reg = target - __x86_indirect_thunk_array; + + if (WARN_ON_ONCE(reg & ~0xf)) + return -1; + + /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */ + BUG_ON(reg == 4); + + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && + !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) + return -1; + + op = insn->opcode.bytes[0]; + + /* + * Convert: + * + * Jcc.d32 __x86_indirect_thunk_\reg + * + * into: + * + * Jncc.d8 1f + * [ LFENCE ] + * JMP *%\reg + * [ NOP ] + * 1: + */ + /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ + if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) { + cc = insn->opcode.bytes[1] & 0xf; + cc ^= 1; /* invert condition */ + + bytes[i++] = 0x70 + cc; /* Jcc.d8 */ + bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */ + + /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */ + op = JMP32_INSN_OPCODE; + } + + /* + * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE. + */ + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + bytes[i++] = 0x0f; + bytes[i++] = 0xae; + bytes[i++] = 0xe8; /* LFENCE */ + } + + ret = emit_indirect(op, reg, bytes + i); + if (ret < 0) + return ret; + i += ret; + + for (; i < insn->length;) + bytes[i++] = 0x90; + + return i; +} + +/* + * Generated by 'objtool --retpoline'. + */ +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; + u8 op1, op2; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + + op1 = insn.opcode.bytes[0]; + op2 = insn.opcode.bytes[1]; + + switch (op1) { + case CALL_INSN_OPCODE: + case JMP32_INSN_OPCODE: + break; + + case 0x0f: /* escape */ + if (op2 >= 0x80 && op2 <= 0x8f) + break; + fallthrough; + default: + WARN_ON_ONCE(1); + continue; + } + + DPRINTK("retpoline at: %pS (%px) len: %d to: %pS", + addr, addr, insn.length, + addr + insn.length + insn.immediate.value); + + len = patch_retpoline(addr, &insn, bytes); + if (len == insn.length) { + optimize_nops(bytes, len); + DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + text_poke_early(addr, bytes, len); + } + } +} + +#ifdef CONFIG_RETHUNK +/* + * Rewrite the compiler generated return thunk tail-calls. + * + * For example, convert: + * + * JMP __x86_return_thunk + * + * into: + * + * RET + */ +static int patch_return(void *addr, struct insn *insn, u8 *bytes) +{ + int i = 0; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return -1; + + bytes[i++] = RET_INSN_OPCODE; + + for (; i < insn->length;) + bytes[i++] = INT3_INSN_OPCODE; + + return i; +} + +void __init_or_module noinline apply_returns(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *dest = NULL, *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; + u8 op; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + + op = insn.opcode.bytes[0]; + if (op == JMP32_INSN_OPCODE) + dest = addr + insn.length + insn.immediate.value; + + if (__static_call_fixup(addr, op, dest) || + WARN_ONCE(dest != &__x86_return_thunk, + "missing return thunk: %pS-%pS: %*ph", + addr, dest, 5, addr)) + continue; + + DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", + addr, addr, insn.length, + addr + insn.length + insn.immediate.value); + + len = patch_return(addr, &insn, bytes); + if (len == insn.length) { + DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + text_poke_early(addr, bytes, len); + } + } +} +#else +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } +#endif /* CONFIG_RETHUNK */ + +#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ + +void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } + +#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ + #ifdef CONFIG_SMP static void alternatives_smp_lock(const s32 *start, const s32 *end, u8 *text, u8 *text_end) @@ -710,6 +1008,13 @@ void __init alternative_instructions(void) * patching. */ + /* + * Rewrite the retpolines, must be done before alternatives since + * those can rewrite the retpoline thunks. + */ + apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); + apply_alternatives(__alt_instructions, __alt_instructions_end); #ifdef CONFIG_SMP @@ -996,10 +1301,13 @@ void text_poke_sync(void) } struct text_poke_loc { - s32 rel_addr; /* addr := _stext + rel_addr */ - s32 rel32; + /* addr := _stext + rel_addr */ + s32 rel_addr; + s32 disp; + u8 len; u8 opcode; const u8 text[POKE_MAX_OPCODE_SIZE]; + /* see text_poke_bp_batch() */ u8 old; }; @@ -1014,7 +1322,8 @@ static struct bp_patching_desc *bp_desc; static __always_inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) { - struct bp_patching_desc *desc = __READ_ONCE(*descp); /* rcu_dereference */ + /* rcu_dereference */ + struct bp_patching_desc *desc = __READ_ONCE(*descp); if (!desc || !arch_atomic_inc_not_zero(&desc->refs)) return NULL; @@ -1048,7 +1357,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs) { struct bp_patching_desc *desc; struct text_poke_loc *tp; - int len, ret = 0; + int ret = 0; void *ip; if (user_mode(regs)) @@ -1088,8 +1397,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs) goto out_put; } - len = text_opcode_size(tp->opcode); - ip += len; + ip += tp->len; switch (tp->opcode) { case INT3_INSN_OPCODE: @@ -1104,12 +1412,12 @@ noinstr int poke_int3_handler(struct pt_regs *regs) break; case CALL_INSN_OPCODE: - int3_emulate_call(regs, (long)ip + tp->rel32); + int3_emulate_call(regs, (long)ip + tp->disp); break; case JMP32_INSN_OPCODE: case JMP8_INSN_OPCODE: - int3_emulate_jmp(regs, (long)ip + tp->rel32); + int3_emulate_jmp(regs, (long)ip + tp->disp); break; default: @@ -1184,7 +1492,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries */ for (do_sync = 0, i = 0; i < nr_entries; i++) { u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, }; - int len = text_opcode_size(tp[i].opcode); + int len = tp[i].len; if (len - INT3_INSN_SIZE > 0) { memcpy(old + INT3_INSN_SIZE, @@ -1261,20 +1569,36 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, const void *opcode, size_t len, const void *emulate) { struct insn insn; + int ret, i; memcpy((void *)tp->text, opcode, len); if (!emulate) emulate = opcode; - kernel_insn_init(&insn, emulate, MAX_INSN_SIZE); - insn_get_length(&insn); - - BUG_ON(!insn_complete(&insn)); - BUG_ON(len != insn.length); + ret = insn_decode_kernel(&insn, emulate); + BUG_ON(ret < 0); tp->rel_addr = addr - (void *)_stext; + tp->len = len; tp->opcode = insn.opcode.bytes[0]; + switch (tp->opcode) { + case RET_INSN_OPCODE: + case JMP32_INSN_OPCODE: + case JMP8_INSN_OPCODE: + /* + * Control flow instructions without implied execution of the + * next instruction can be padded with INT3. + */ + for (i = insn.length; i < len; i++) + BUG_ON(tp->text[i] != INT3_INSN_OPCODE); + break; + + default: + BUG_ON(len != insn.length); + }; + + switch (tp->opcode) { case INT3_INSN_OPCODE: case RET_INSN_OPCODE: @@ -1283,7 +1607,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, case CALL_INSN_OPCODE: case JMP32_INSN_OPCODE: case JMP8_INSN_OPCODE: - tp->rel32 = insn.immediate.value; + tp->disp = insn.immediate.value; break; default: /* assume NOP */ @@ -1291,13 +1615,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, case 2: /* NOP2 -- emulate as JMP8+0 */ BUG_ON(memcmp(emulate, ideal_nops[len], len)); tp->opcode = JMP8_INSN_OPCODE; - tp->rel32 = 0; + tp->disp = 0; break; case 5: /* NOP5 -- emulate as JMP32+0 */ BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len)); tp->opcode = JMP32_INSN_OPCODE; - tp->rel32 = 0; + tp->disp = 0; break; default: /* unknown instruction */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 24539a05c58c..1c96f2425eaf 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -168,7 +168,7 @@ static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; notsc_setup(NULL); - return 0; + return 1; } __setup("apicpmtimer", setup_apicpmtimer); #endif diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 40f466de8924..9c283562dfd4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -199,7 +199,13 @@ static void __init uv_tsc_check_sync(void) int mmr_shift; char *state; - /* Different returns from different UV BIOS versions */ + /* UV5 guarantees synced TSCs; do not zero TSC_ADJUST */ + if (!is_uv(UV2|UV3|UV4)) { + mark_tsc_async_resets("UV5+"); + return; + } + + /* UV2,3,4, UV BIOS TSC sync state available */ mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR); mmr_shift = is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index acea05eed27d..8b9e3277a6ce 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -914,6 +914,28 @@ static void init_amd_bd(struct cpuinfo_x86 *c) clear_rdrand_cpuid_bit(c); } +void init_spectral_chicken(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_CPU_UNRET_ENTRY + u64 value; + + /* + * On Zen2 we offer this chicken (bit) on the altar of Speculation. + * + * This suppresses speculation from the middle of a basic block, i.e. it + * suppresses non-branch predictions. + * + * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { + if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { + value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; + wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); + } + } +#endif +} + static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); @@ -922,12 +944,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) node_reclaim_distance = 32; #endif - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. - * Always set it, except when running under a hypervisor. - */ - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) - set_cpu_cap(c, X86_FEATURE_CPB); + /* Fix up CPUID bits, but only if not virtualised. */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { + + /* Erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) + set_cpu_cap(c, X86_FEATURE_CPB); + + /* + * Zen3 (Fam19 model < 0x10) parts are not susceptible to + * Branch Type Confusion, but predate the allocation of the + * BTC_NO bit. + */ + if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) + set_cpu_cap(c, X86_FEATURE_BTC_NO); + } } static void init_amd(struct cpuinfo_x86 *c) @@ -959,7 +990,8 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; case 0x16: init_amd_jg(c); break; - case 0x17: fallthrough; + case 0x17: init_spectral_chicken(c); + fallthrough; case 0x19: init_amd_zn(c); break; } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 78b9514a3844..859a3f59526c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -38,23 +38,51 @@ static void __init spectre_v1_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); +static void __init retbleed_select_mitigation(void); +static void __init spectre_v2_user_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); -static void __init mds_print_mitigation(void); +static void __init md_clear_update_mitigation(void); +static void __init md_clear_select_mitigation(void); static void __init taa_select_mitigation(void); +static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); -/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +/* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + +/* The current value of the SPEC_CTRL MSR with task-specific bits set */ +DEFINE_PER_CPU(u64, x86_spec_ctrl_current); +EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); + static DEFINE_MUTEX(spec_ctrl_mutex); /* - * The vendor and possibly platform specific bits which can be modified in - * x86_spec_ctrl_base. + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; +void write_spec_ctrl_current(u64 val, bool force) +{ + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; + + this_cpu_write(x86_spec_ctrl_current, val); + + /* + * When KERNEL_IBRS this MSR is written on return-to-user, unless + * forced the update can be delayed until that time. + */ + if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + +u64 spec_ctrl_current(void) +{ + return this_cpu_read(x86_spec_ctrl_current); +} +EXPORT_SYMBOL_GPL(spec_ctrl_current); /* * AMD specific MSR info for Speculative Store Bypass control. @@ -77,6 +105,10 @@ EXPORT_SYMBOL_GPL(mds_user_clear); DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */ +DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); +EXPORT_SYMBOL_GPL(mmio_stale_data_clear); + void __init check_bugs(void) { identify_boot_cpu(); @@ -100,25 +132,26 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - /* Allow STIBP in MSR_SPEC_CTRL if supported */ - if (boot_cpu_has(X86_FEATURE_STIBP)) - x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; - /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); + /* + * retbleed_select_mitigation() relies on the state set by + * spectre_v2_select_mitigation(); specifically it wants to know about + * spectre_v2=ibrs. + */ + retbleed_select_mitigation(); + /* + * spectre_v2_user_select_mitigation() relies on the state set by + * retbleed_select_mitigation(); specifically the STIBP selection is + * forced for UNRET. + */ + spectre_v2_user_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); - mds_select_mitigation(); - taa_select_mitigation(); + md_clear_select_mitigation(); srbds_select_mitigation(); - /* - * As MDS and TAA mitigations are inter-related, print MDS - * mitigation until after TAA mitigation selection is done. - */ - mds_print_mitigation(); - arch_smt_update(); #ifdef CONFIG_X86_32 @@ -153,31 +186,17 @@ void __init check_bugs(void) #endif } +/* + * NOTE: For VMX, this function is not called in the vmexit path. + * It uses vmx_spec_ctrl_restore_host() instead. + */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval, hostval = x86_spec_ctrl_base; + u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); struct thread_info *ti = current_thread_info(); - /* Is MSR_SPEC_CTRL implemented ? */ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - /* - * Restrict guest_spec_ctrl to supported values. Clear the - * modifiable bits in the host base value and or the - * modifiable bits from the guest value. - */ - guestval = hostval & ~x86_spec_ctrl_mask; - guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; - - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) - hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - - /* Conditional STIBP enabled? */ - if (static_branch_unlikely(&switch_to_cond_stibp)) - hostval |= stibp_tif_to_spec_ctrl(ti->flags); - if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -258,14 +277,6 @@ static void __init mds_select_mitigation(void) } } -static void __init mds_print_mitigation(void) -{ - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) - return; - - pr_info("%s\n", mds_strings[mds_mitigation]); -} - static int __init mds_cmdline(char *str) { if (!boot_cpu_has_bug(X86_BUG_MDS)) @@ -320,7 +331,7 @@ static void __init taa_select_mitigation(void) /* TSX previously disabled by tsx=off */ if (!boot_cpu_has(X86_FEATURE_RTM)) { taa_mitigation = TAA_MITIGATION_TSX_DISABLED; - goto out; + return; } if (cpu_mitigations_off()) { @@ -334,7 +345,7 @@ static void __init taa_select_mitigation(void) */ if (taa_mitigation == TAA_MITIGATION_OFF && mds_mitigation == MDS_MITIGATION_OFF) - goto out; + return; if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) taa_mitigation = TAA_MITIGATION_VERW; @@ -366,18 +377,6 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); - - /* - * Update MDS mitigation, if necessary, as the mds_user_clear is - * now enabled for TAA mitigation. - */ - if (mds_mitigation == MDS_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MDS)) { - mds_mitigation = MDS_MITIGATION_FULL; - mds_select_mitigation(); - } -out: - pr_info("%s\n", taa_strings[taa_mitigation]); } static int __init tsx_async_abort_parse_cmdline(char *str) @@ -401,6 +400,151 @@ static int __init tsx_async_abort_parse_cmdline(char *str) } early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "MMIO Stale Data: " fmt + +enum mmio_mitigations { + MMIO_MITIGATION_OFF, + MMIO_MITIGATION_UCODE_NEEDED, + MMIO_MITIGATION_VERW, +}; + +/* Default mitigation for Processor MMIO Stale Data vulnerabilities */ +static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW; +static bool mmio_nosmt __ro_after_init = false; + +static const char * const mmio_strings[] = { + [MMIO_MITIGATION_OFF] = "Vulnerable", + [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", +}; + +static void __init mmio_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || + cpu_mitigations_off()) { + mmio_mitigation = MMIO_MITIGATION_OFF; + return; + } + + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + /* + * Enable CPU buffer clear mitigation for host and VMM, if also affected + * by MDS or TAA. Otherwise, enable mitigation for VMM only. + */ + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && + boot_cpu_has(X86_FEATURE_RTM))) + static_branch_enable(&mds_user_clear); + else + static_branch_enable(&mmio_stale_data_clear); + + /* + * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can + * be propagated to uncore buffers, clearing the Fill buffers on idle + * is required irrespective of SMT state. + */ + if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + static_branch_enable(&mds_idle_clear); + + /* + * Check if the system has the right microcode. + * + * CPU Fill buffer clear mitigation is enumerated by either an explicit + * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS + * affected systems. + */ + if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + (boot_cpu_has(X86_FEATURE_MD_CLEAR) && + boot_cpu_has(X86_FEATURE_FLUSH_L1D) && + !(ia32_cap & ARCH_CAP_MDS_NO))) + mmio_mitigation = MMIO_MITIGATION_VERW; + else + mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; + + if (mmio_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); +} + +static int __init mmio_stale_data_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + mmio_mitigation = MMIO_MITIGATION_OFF; + } else if (!strcmp(str, "full")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + } else if (!strcmp(str, "full,nosmt")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_nosmt = true; + } + + return 0; +} +early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); + +#undef pr_fmt +#define pr_fmt(fmt) "" fmt + +static void __init md_clear_update_mitigation(void) +{ + if (cpu_mitigations_off()) + return; + + if (!static_key_enabled(&mds_user_clear)) + goto out; + + /* + * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data + * mitigation, if necessary. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } + if (taa_mitigation == TAA_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_VERW; + taa_select_mitigation(); + } + if (mmio_mitigation == MMIO_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_select_mitigation(); + } +out: + if (boot_cpu_has_bug(X86_BUG_MDS)) + pr_info("MDS: %s\n", mds_strings[mds_mitigation]); + if (boot_cpu_has_bug(X86_BUG_TAA)) + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); +} + +static void __init md_clear_select_mitigation(void) +{ + mds_select_mitigation(); + taa_select_mitigation(); + mmio_select_mitigation(); + + /* + * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update + * and print their mitigation after MDS, TAA and MMIO Stale Data + * mitigation selection is done. + */ + md_clear_update_mitigation(); +} + #undef pr_fmt #define pr_fmt(fmt) "SRBDS: " fmt @@ -462,11 +606,13 @@ static void __init srbds_select_mitigation(void) return; /* - * Check to see if this is one of the MDS_NO systems supporting - * TSX that are only exposed to SRBDS when TSX is enabled. + * Check to see if this is one of the MDS_NO systems supporting TSX that + * are only exposed to SRBDS when TSX is enabled or when CPU is affected + * by Processor MMIO Stale Data vulnerability. */ ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM)) + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; @@ -582,12 +728,180 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +#undef pr_fmt +#define pr_fmt(fmt) "RETBleed: " fmt + +enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, + RETBLEED_MITIGATION_IBPB, + RETBLEED_MITIGATION_IBRS, + RETBLEED_MITIGATION_EIBRS, +}; + +enum retbleed_mitigation_cmd { + RETBLEED_CMD_OFF, + RETBLEED_CMD_AUTO, + RETBLEED_CMD_UNRET, + RETBLEED_CMD_IBPB, +}; + +const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", + [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", +}; + +static enum retbleed_mitigation retbleed_mitigation __ro_after_init = + RETBLEED_MITIGATION_NONE; +static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = + RETBLEED_CMD_AUTO; + +static int __ro_after_init retbleed_nosmt = false; + +static int __init retbleed_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + while (str) { + char *next = strchr(str, ','); + if (next) { + *next = 0; + next++; + } + + if (!strcmp(str, "off")) { + retbleed_cmd = RETBLEED_CMD_OFF; + } else if (!strcmp(str, "auto")) { + retbleed_cmd = RETBLEED_CMD_AUTO; + } else if (!strcmp(str, "unret")) { + retbleed_cmd = RETBLEED_CMD_UNRET; + } else if (!strcmp(str, "ibpb")) { + retbleed_cmd = RETBLEED_CMD_IBPB; + } else if (!strcmp(str, "nosmt")) { + retbleed_nosmt = true; + } else { + pr_err("Ignoring unknown retbleed option (%s).", str); + } + + str = next; + } + + return 0; +} +early_param("retbleed", retbleed_parse_cmdline); + +#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + +static void __init retbleed_select_mitigation(void) +{ + bool mitigate_smt = false; + + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + return; + + switch (retbleed_cmd) { + case RETBLEED_CMD_OFF: + return; + + case RETBLEED_CMD_UNRET: + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + } else { + pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); + goto do_cmd_auto; + } + break; + + case RETBLEED_CMD_IBPB: + if (!boot_cpu_has(X86_FEATURE_IBPB)) { + pr_err("WARNING: CPU does not support IBPB.\n"); + goto do_cmd_auto; + } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + goto do_cmd_auto; + } + break; + +do_cmd_auto: + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } + + /* + * The Intel mitigation (IBRS or eIBRS) was already selected in + * spectre_v2_select_mitigation(). 'retbleed_mitigation' will + * be set accordingly below. + */ + + break; + } + + switch (retbleed_mitigation) { + case RETBLEED_MITIGATION_UNRET: + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); + + mitigate_smt = true; + break; + + case RETBLEED_MITIGATION_IBPB: + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + mitigate_smt = true; + break; + + default: + break; + } + + if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && + (retbleed_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + + /* + * Let IBRS trump all on Intel without affecting the effects of the + * retbleed= cmdline option. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + switch (spectre_v2_enabled) { + case SPECTRE_V2_IBRS: + retbleed_mitigation = RETBLEED_MITIGATION_IBRS; + break; + case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_RETPOLINE: + case SPECTRE_V2_EIBRS_LFENCE: + retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; + break; + default: + pr_err(RETBLEED_INTEL_MSG); + } + } + + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); +} + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = SPECTRE_V2_USER_NONE; static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = @@ -617,6 +931,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; } #define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" #define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n" +#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n" #ifdef CONFIG_BPF_SYSCALL void unpriv_ebpf_notify(int new_state) @@ -658,6 +973,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_EIBRS, SPECTRE_V2_CMD_EIBRS_RETPOLINE, SPECTRE_V2_CMD_EIBRS_LFENCE, + SPECTRE_V2_CMD_IBRS, }; enum spectre_v2_user_cmd { @@ -698,13 +1014,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) pr_info("spectre_v2_user=%s forced on command line.\n", reason); } +static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; + static enum spectre_v2_user_cmd __init -spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_parse_user_cmdline(void) { char arg[20]; int ret, i; - switch (v2_cmd) { + switch (spectre_v2_cmd) { case SPECTRE_V2_CMD_NONE: return SPECTRE_V2_USER_CMD_NONE; case SPECTRE_V2_CMD_FORCE: @@ -730,15 +1048,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) { - return (mode == SPECTRE_V2_EIBRS || - mode == SPECTRE_V2_EIBRS_RETPOLINE || - mode == SPECTRE_V2_EIBRS_LFENCE); + return mode == SPECTRE_V2_IBRS || + mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE; } static void __init -spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_user_select_mitigation(void) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); @@ -751,7 +1070,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - cmd = spectre_v2_parse_user_cmdline(v2_cmd); + cmd = spectre_v2_parse_user_cmdline(); switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; @@ -799,12 +1118,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } /* - * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not - * required. + * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, + * STIBP is not required. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return; /* @@ -816,6 +1135,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) mode = SPECTRE_V2_USER_STRICT_PREFERRED; + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (mode != SPECTRE_V2_USER_STRICT && + mode != SPECTRE_V2_USER_STRICT_PREFERRED) + pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n"); + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + } + spectre_v2_user_stibp = mode; set_mode: @@ -829,6 +1155,7 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; static const struct { @@ -846,6 +1173,7 @@ static const struct { { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, + { "ibrs", SPECTRE_V2_CMD_IBRS, false }, }; static void __init spec_v2_print_cond(const char *reason, bool secure) @@ -908,6 +1236,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } + if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { + pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { + pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + spec_v2_print_cond(mitigation_options[i].option, mitigation_options[i].secure); return cmd; @@ -923,6 +1275,69 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +/* Disable in-kernel use of non-RSB RET predictors */ +static void __init spec_ctrl_disable_kernel_rrsba(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + if (ia32_cap & ARCH_CAP_RRSBA) { + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } +} + +static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) +{ + /* + * Similar to context switches, there are two types of RSB attacks + * after VM exit: + * + * 1) RSB underflow + * + * 2) Poisoned RSB entry + * + * When retpoline is enabled, both are mitigated by filling/clearing + * the RSB. + * + * When IBRS is enabled, while #1 would be mitigated by the IBRS branch + * prediction isolation protections, RSB still needs to be cleared + * because of #2. Note that SMEP provides no protection here, unlike + * user-space-poisoned RSB entries. + * + * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB + * bug is present then a LITE version of RSB protection is required, + * just a single call needs to retire before a RET is executed. + */ + switch (mode) { + case SPECTRE_V2_NONE: + return; + + case SPECTRE_V2_EIBRS_LFENCE: + case SPECTRE_V2_EIBRS: + if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); + pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); + } + return; + + case SPECTRE_V2_EIBRS_RETPOLINE: + case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_IBRS: + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); + return; + } + + pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); + dump_stack(); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -947,6 +1362,15 @@ static void __init spectre_v2_select_mitigation(void) break; } + if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && + boot_cpu_has_bug(X86_BUG_RETBLEED) && + retbleed_cmd != RETBLEED_CMD_OFF && + boot_cpu_has(X86_FEATURE_IBRS) && + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + mode = SPECTRE_V2_IBRS; + break; + } + mode = spectre_v2_select_retpoline(); break; @@ -963,6 +1387,10 @@ static void __init spectre_v2_select_mitigation(void) mode = spectre_v2_select_retpoline(); break; + case SPECTRE_V2_CMD_IBRS: + mode = SPECTRE_V2_IBRS; + break; + case SPECTRE_V2_CMD_EIBRS: mode = SPECTRE_V2_EIBRS; break; @@ -979,10 +1407,9 @@ static void __init spectre_v2_select_mitigation(void) if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); - if (spectre_v2_in_eibrs_mode(mode)) { - /* Force it so VMEXIT will restore correctly */ + if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } switch (mode) { @@ -990,6 +1417,12 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_EIBRS: break; + case SPECTRE_V2_IBRS: + setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); + if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) + pr_warn(SPECTRE_V2_IBRS_PERF_MSG); + break; + case SPECTRE_V2_LFENCE: case SPECTRE_V2_EIBRS_LFENCE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); @@ -1001,43 +1434,96 @@ static void __init spectre_v2_select_mitigation(void) break; } + /* + * Disable alternate RSB predictions in kernel when indirect CALLs and + * JMPs gets protection against BHI and Intramode-BTI, but RET + * prediction from a non-RSB predictor is still a risk. + */ + if (mode == SPECTRE_V2_EIBRS_LFENCE || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_RETPOLINE) + spec_ctrl_disable_kernel_rrsba(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); /* - * If spectre v2 protection has been enabled, unconditionally fill - * RSB during a context switch; this protects against two independent - * issues: + * If Spectre v2 protection has been enabled, fill the RSB during a + * context switch. In general there are two types of RSB attacks + * across context switches, for which the CALLs/RETs may be unbalanced. * - * - RSB underflow (and switch to BTB) on Skylake+ - * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs + * 1) RSB underflow + * + * Some Intel parts have "bottomless RSB". When the RSB is empty, + * speculated return targets may come from the branch predictor, + * which could have a user-poisoned BTB or BHB entry. + * + * AMD has it even worse: *all* returns are speculated from the BTB, + * regardless of the state of the RSB. + * + * When IBRS or eIBRS is enabled, the "user -> kernel" attack + * scenario is mitigated by the IBRS branch prediction isolation + * properties, so the RSB buffer filling wouldn't be necessary to + * protect against this type of attack. + * + * The "user -> user" attack scenario is mitigated by RSB filling. + * + * 2) Poisoned RSB entry + * + * If the 'next' in-kernel return stack is shorter than 'prev', + * 'next' could be tricked into speculating with a user-poisoned RSB + * entry. + * + * The "user -> kernel" attack scenario is mitigated by SMEP and + * eIBRS. + * + * The "user -> user" scenario, also known as SpectreBHB, requires + * RSB clearing. + * + * So to mitigate all cases, unconditionally fill RSB on context + * switches. + * + * FIXME: Is this pointless for retbleed-affected AMD? */ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + spectre_v2_determine_rsb_fill_type_at_vmexit(mode); + /* - * Retpoline means the kernel is safe because it has no indirect - * branches. Enhanced IBRS protects firmware too, so, enable restricted - * speculation around firmware calls only when Enhanced IBRS isn't - * supported. + * Retpoline protects the kernel, but doesn't protect firmware. IBRS + * and Enhanced IBRS protect firmware too, so enable IBRS around + * firmware calls only when IBRS / Enhanced IBRS aren't otherwise + * enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { + if (boot_cpu_has_bug(X86_BUG_RETBLEED) && + boot_cpu_has(X86_FEATURE_IBPB) && + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) { + + if (retbleed_cmd != RETBLEED_CMD_IBPB) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW); + pr_info("Enabling Speculation Barrier for firmware calls\n"); + } + + } else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } /* Set up IBPB and STIBP depending on the general spectre V2 command */ - spectre_v2_user_select_mitigation(cmd); + spectre_v2_cmd = cmd; } static void update_stibp_msr(void * __unused) { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); + write_spec_ctrl_current(val, true); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1072,6 +1558,8 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { + u64 ia32_cap = x86_read_arch_cap_msr(); + /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1083,14 +1571,17 @@ static void update_mds_branch_idle(void) if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) return; - if (sched_smt_active()) + if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); - else + } else if (mmio_mitigation == MMIO_MITIGATION_OFF || + (ia32_cap & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); + } } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" void cpu_bugs_smt_update(void) { @@ -1135,6 +1626,16 @@ void cpu_bugs_smt_update(void) break; } + switch (mmio_mitigation) { + case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(MMIO_MSG_SMT); + break; + case MMIO_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -1238,16 +1739,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) break; } - /* - * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper - * bit in the mask to allow guests to use the mitigation even in the - * case where the host does not enable it. - */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; - } - /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. @@ -1265,7 +1756,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } } @@ -1483,7 +1974,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); @@ -1704,9 +2195,23 @@ static ssize_t tsx_async_abort_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t mmio_stale_data_show_state(char *buf) +{ + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mmio_strings[mmio_mitigation]); + } + + return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { @@ -1736,6 +2241,19 @@ static char *ibpb_state(void) return ""; } +static char *pbrsb_eibrs_state(void) +{ + if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { + if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || + boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) + return ", PBRSB-eIBRS: SW sequence"; + else + return ", PBRSB-eIBRS: Vulnerable"; + } else { + return ", PBRSB-eIBRS: Not affected"; + } +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -1748,12 +2266,13 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sprintf(buf, "%s%s%s%s%s%s\n", + return sprintf(buf, "%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + pbrsb_eibrs_state(), spectre_v2_module_string()); } @@ -1762,6 +2281,24 @@ static ssize_t srbds_show_state(char *buf) return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); } +static ssize_t retbleed_show_state(char *buf) +{ + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + + return sprintf(buf, "%s; SMT %s\n", + retbleed_strings[retbleed_mitigation], + !sched_smt_active() ? "disabled" : + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? + "enabled with STIBP protection" : "vulnerable"); + } + + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -1804,6 +2341,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRBDS: return srbds_show_state(buf); + case X86_BUG_MMIO_STALE_DATA: + return mmio_stale_data_show_state(buf); + + case X86_BUG_RETBLEED: + return retbleed_show_state(buf); + default: break; } @@ -1855,4 +2398,14 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * { return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); } + +ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); +} + +ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9c8fc6f513ed..9fc91482e85e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1024,6 +1024,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SWAPGS BIT(6) #define NO_ITLB_MULTIHIT BIT(7) #define NO_SPECTRE_V2 BIT(8) +#define NO_EIBRS_PBRSB BIT(9) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1064,7 +1065,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -1074,7 +1075,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { * good enough for our purposes. */ - VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), @@ -1092,24 +1095,60 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { {} }; +#define VULNBL(vendor, family, model, blacklist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ INTEL_FAM6_##model, steppings, \ X86_FEATURE_ANY, issues) +#define VULNBL_AMD(family, blacklist) \ + VULNBL(AMD, family, X86_MODEL_ANY, blacklist) + +#define VULNBL_HYGON(family, blacklist) \ + VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) + #define SRBDS BIT(0) +/* CPU is affected by X86_BUG_MMIO_STALE_DATA */ +#define MMIO BIT(1) +/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ +#define MMIO_SBDS BIT(2) +/* CPU is affected by RETbleed, speculating where you would not expect it */ +#define RETBLEED BIT(3) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), + VULNBL_AMD(0x17, RETBLEED), + VULNBL_HYGON(0x18, RETBLEED), {} }; @@ -1130,6 +1169,13 @@ u64 x86_read_arch_cap_msr(void) return ia32_cap; } +static bool arch_cap_mmio_immune(u64 ia32_cap) +{ + return (ia32_cap & ARCH_CAP_FBSDP_NO && + ia32_cap & ARCH_CAP_PSDP_NO && + ia32_cap & ARCH_CAP_SBDR_SSDP_NO); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1183,12 +1229,37 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) /* * SRBDS affects CPUs which support RDRAND or RDSEED and are listed * in the vulnerability blacklist. + * + * Some of the implications and mitigation of Shared Buffers Data + * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as + * SRBDS. */ if ((cpu_has(c, X86_FEATURE_RDRAND) || cpu_has(c, X86_FEATURE_RDSEED)) && - cpu_matches(cpu_vuln_blacklist, SRBDS)) + cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS)) setup_force_cpu_bug(X86_BUG_SRBDS); + /* + * Processor MMIO Stale Data bug enumeration + * + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. + */ + if (cpu_matches(cpu_vuln_blacklist, MMIO) && + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + setup_force_cpu_bug(X86_BUG_RETBLEED); + } + + if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && + !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(ia32_cap & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 093f5fc860e3..91df90abc1d9 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -60,6 +60,8 @@ extern void tsx_disable(void); static inline void tsx_init(void) { } #endif /* CONFIG_CPU_SUP_INTEL */ +extern void init_spectral_chicken(struct cpuinfo_x86 *c); + extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index b78c471ec344..774ca6bfda9f 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -318,6 +318,12 @@ static void init_hygon(struct cpuinfo_x86 *c) /* get apicid instead of initial apic id from cpuid */ c->apicid = hard_smp_processor_id(); + /* + * XXX someone from Hygon needs to confirm this DTRT + * + init_spectral_chicken(c); + */ + set_cpu_cap(c, X86_FEATURE_ZEN); set_cpu_cap(c, X86_FEATURE_CPB); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 816fdbec795a..c6ad53e38f65 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -88,7 +88,7 @@ static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) { ring3mwait_disabled = true; - return 0; + return 1; } __setup("ring3mwait=disable", ring3mwait_disable); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index f73f1184b1c1..09f7c652346a 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1457,10 +1457,23 @@ static void threshold_remove_bank(struct threshold_bank *bank) kfree(bank); } +static void __threshold_remove_device(struct threshold_bank **bp) +{ + unsigned int bank, numbanks = this_cpu_read(mce_num_banks); + + for (bank = 0; bank < numbanks; bank++) { + if (!bp[bank]) + continue; + + threshold_remove_bank(bp[bank]); + bp[bank] = NULL; + } + kfree(bp); +} + int mce_threshold_remove_device(unsigned int cpu) { struct threshold_bank **bp = this_cpu_read(threshold_banks); - unsigned int bank, numbanks = this_cpu_read(mce_num_banks); if (!bp) return 0; @@ -1471,13 +1484,7 @@ int mce_threshold_remove_device(unsigned int cpu) */ this_cpu_write(threshold_banks, NULL); - for (bank = 0; bank < numbanks; bank++) { - if (bp[bank]) { - threshold_remove_bank(bp[bank]); - bp[bank] = NULL; - } - } - kfree(bp); + __threshold_remove_device(bp); return 0; } @@ -1514,15 +1521,14 @@ int mce_threshold_create_device(unsigned int cpu) if (!(this_cpu_read(bank_map) & (1 << bank))) continue; err = threshold_create_bank(bp, cpu, bank); - if (err) - goto out_err; + if (err) { + __threshold_remove_device(bp); + return err; + } } this_cpu_write(threshold_banks, bp); if (thresholding_irq_en) mce_threshold_vector = amd_threshold_interrupt; return 0; -out_err: - mce_threshold_remove_device(cpu); - return err; } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 65d11711cd7b..021cd067733e 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -84,7 +84,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); - add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR); ack_APIC_irq(); set_irq_regs(old_regs); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 866c9a9bcdee..82fe492121bb 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,7 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 7edbd5ee5ed4..dca5cf82144c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -308,7 +308,7 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -#define RET_SIZE 1 +#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) @@ -367,7 +367,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* The trampoline ends with ret(q) */ retq = (unsigned long)ftrace_stub; - ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + memcpy(ip, text_gen_insn(JMP32_INSN_OPCODE, ip, &__x86_return_thunk), JMP32_INSN_SIZE); + else + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index e405fe1a8bf4..a0ed0e4a2c0c 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -19,7 +19,7 @@ #endif SYM_FUNC_START(__fentry__) - ret + RET SYM_FUNC_END(__fentry__) EXPORT_SYMBOL(__fentry__) @@ -84,7 +84,7 @@ ftrace_graph_call: /* This is weak to keep gas from relaxing the jumps */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) - ret + RET SYM_CODE_END(ftrace_caller) SYM_CODE_START(ftrace_regs_caller) @@ -177,7 +177,7 @@ SYM_CODE_START(ftrace_graph_caller) popl %edx popl %ecx popl %eax - ret + RET SYM_CODE_END(ftrace_graph_caller) .globl return_to_handler diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index ac3d5f22fe64..e3a375185a1b 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -132,7 +132,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE SYM_FUNC_START(__fentry__) - retq + RET SYM_FUNC_END(__fentry__) EXPORT_SYMBOL(__fentry__) @@ -170,10 +170,11 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) /* * This is weak to keep gas from relaxing the jumps. - * It is also used to copy the retq for trampolines. + * It is also used to copy the RET for trampolines. */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) - retq + UNWIND_HINT_FUNC + RET SYM_FUNC_END(ftrace_epilogue) SYM_FUNC_START(ftrace_regs_caller) @@ -265,7 +266,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) restore_mcount_regs 8 /* Restore flags */ popfq - UNWIND_HINT_RET_OFFSET + UNWIND_HINT_FUNC jmp ftrace_epilogue SYM_FUNC_END(ftrace_regs_caller) @@ -287,7 +288,7 @@ fgraph_trace: #endif SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) - retq + RET trace: /* save_mcount_regs fills in first two parameters */ @@ -319,7 +320,7 @@ SYM_FUNC_START(ftrace_graph_caller) restore_mcount_regs - retq + RET SYM_FUNC_END(ftrace_graph_caller) SYM_CODE_START(return_to_handler) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 05e117137b45..efe13ab366f4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -419,6 +419,8 @@ static void __init clear_bss(void) { memset(__bss_start, 0, (unsigned long) __bss_stop - (unsigned long) __bss_start); + memset(__brk_base, 0, + (unsigned long) __brk_limit - (unsigned long) __brk_base); } static unsigned long get_cmd_line_ptr(void) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 7ed84c282233..3f1691b89231 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -354,7 +355,7 @@ setup_once: #endif andl $0,setup_once_ref /* Once is enough, thanks */ - ret + RET SYM_FUNC_START(early_idt_handler_array) # 36(%esp) %eflags diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3c417734790f..0424c2a6c15b 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -321,6 +321,8 @@ SYM_CODE_END(start_cpu0) SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -378,6 +380,7 @@ SYM_CODE_START(early_idt_handler_array) SYM_CODE_END(early_idt_handler_array) SYM_CODE_START_LOCAL(early_idt_handler_common) + ANNOTATE_UNRET_END /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -424,6 +427,8 @@ SYM_CODE_END(early_idt_handler_common) SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index 7dfb1e808928..bd218470d145 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -88,6 +88,8 @@ const char * const *arch_get_ima_policy(void) if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) { if (IS_ENABLED(CONFIG_MODULE_SIG)) set_module_sig_enforced(); + if (IS_ENABLED(CONFIG_KEXEC_SIG)) + set_kexec_sig_enforced(); return sb_arch_rules; } return NULL; diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S index 0db0375235b4..a9c36400bdf8 100644 --- a/arch/x86/kernel/irqflags.S +++ b/arch/x86/kernel/irqflags.S @@ -10,7 +10,7 @@ SYM_FUNC_START(native_save_fl) pushf pop %_ASM_AX - ret + RET SYM_FUNC_END(native_save_fl) EXPORT_SYMBOL(native_save_fl) @@ -21,6 +21,6 @@ EXPORT_SYMBOL(native_save_fl) SYM_FUNC_START(native_restore_fl) push %_ASM_ARG1 popf - ret + RET SYM_FUNC_END(native_restore_fl) EXPORT_SYMBOL(native_restore_fl) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 535da74c124e..ee85f1b258d0 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -768,7 +768,7 @@ asm( RESTORE_REGS_STRING " popfl\n" #endif - " ret\n" + ASM_RET ".size kretprobe_trampoline, .-kretprobe_trampoline\n" ); NOKPROBE_SYMBOL(kretprobe_trampoline); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6c3d38b5a8ad..fe9babe94861 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -188,7 +188,7 @@ void kvm_async_pf_task_wake(u32 token) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; - struct kvm_task_sleep_node *n; + struct kvm_task_sleep_node *n, *dummy = NULL; if (token == ~0) { apf_task_wake_all(); @@ -200,28 +200,41 @@ void kvm_async_pf_task_wake(u32 token) n = _find_apf_task(b, token); if (!n) { /* - * async PF was not yet handled. - * Add dummy entry for the token. + * Async #PF not yet handled, add a dummy entry for the token. + * Allocating the token must be down outside of the raw lock + * as the allocator is preemptible on PREEMPT_RT kernels. */ - n = kzalloc(sizeof(*n), GFP_ATOMIC); - if (!n) { - /* - * Allocation failed! Busy wait while other cpu - * handles async PF. - */ + if (!dummy) { raw_spin_unlock(&b->lock); - cpu_relax(); + dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC); + + /* + * Continue looping on allocation failure, eventually + * the async #PF will be handled and allocating a new + * node will be unnecessary. + */ + if (!dummy) + cpu_relax(); + + /* + * Recheck for async #PF completion before enqueueing + * the dummy token to avoid duplicate list entries. + */ goto again; } - n->token = token; - n->cpu = smp_processor_id(); - init_swait_queue_head(&n->wq); - hlist_add_head(&n->link, &b->list); + dummy->token = token; + dummy->cpu = smp_processor_id(); + init_swait_queue_head(&dummy->wq); + hlist_add_head(&dummy->link, &b->list); + dummy = NULL; } else { apf_task_wake_one(n); } raw_spin_unlock(&b->lock); - return; + + /* A dummy token might be allocated and ultimately not used. */ + if (dummy) + kfree(dummy); } EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); @@ -940,7 +953,7 @@ asm( "movq __per_cpu_offset(,%rdi,8), %rax;" "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" "setne %al;" -"ret;" +ASM_RET ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" ".popsection"); diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index c98c220d39a9..eeb6992f3307 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -255,7 +255,8 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *me) { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, - *para = NULL, *orc = NULL, *orc_ip = NULL; + *para = NULL, *orc = NULL, *orc_ip = NULL, + *retpolines = NULL, *returns = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,8 +272,20 @@ int module_finalize(const Elf_Ehdr *hdr, orc = s; if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) orc_ip = s; + if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) + retpolines = s; + if (!strcmp(".return_sites", secstrings + s->sh_name)) + returns = s; } + if (retpolines) { + void *rseg = (void *)retpolines->sh_addr; + apply_retpolines(rseg, rseg + retpolines->sh_size); + } + if (returns) { + void *rseg = (void *)returns->sh_addr; + apply_returns(rseg, rseg + returns->sh_size); + } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5e5fcf5c376d..e21937680d1f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -40,7 +40,7 @@ extern void _paravirt_nop(void); asm (".pushsection .entry.text, \"ax\"\n" ".global _paravirt_nop\n" "_paravirt_nop:\n\t" - "ret\n\t" + ASM_RET ".size _paravirt_nop, . - _paravirt_nop\n\t" ".type _paravirt_nop, @function\n\t" ".popsection"); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0aa1baf9a3af..a2823682d64e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -556,7 +556,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + write_spec_ctrl_current(msr, false); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 94b33885f8d2..82a839885a3c 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -7,10 +7,12 @@ #include #include #include +#include #include /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 2) @@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) movl %edi, %eax addl $(identity_mapped - relocate_kernel), %eax pushl %eax + ANNOTATE_UNRET_SAFE ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %edx, %edx xorl %esi, %esi xorl %ebp, %ebp + ANNOTATE_UNRET_SAFE ret + int3 1: popl %edx movl CP_PA_SWAP_PAGE(%edi), %esp addl $PAGE_SIZE, %esp 2: + ANNOTATE_RETPOLINE_SAFE call *%edx /* get the re-entry point of the peer system */ @@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movl %edi, %eax addl $(virtual_mapped - relocate_kernel), %eax pushl %eax + ANNOTATE_UNRET_SAFE ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popl %edi popl %esi popl %ebx + ANNOTATE_UNRET_SAFE ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) popl %edi popl %ebx popl %ebp + ANNOTATE_UNRET_SAFE ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index a4d9a261425b..d2b7d212caa4 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -13,7 +13,8 @@ #include /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 3) @@ -104,7 +105,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) /* jump to identity mapped page */ addq $(identity_mapped - relocate_kernel), %r8 pushq %r8 + ANNOTATE_UNRET_SAFE ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -191,7 +194,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %r14d, %r14d xorl %r15d, %r15d + ANNOTATE_UNRET_SAFE ret + int3 1: popq %rdx @@ -210,7 +215,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) call swap_pages movq $virtual_mapped, %rax pushq %rax + ANNOTATE_UNRET_SAFE ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -231,7 +238,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popq %r12 popq %rbp popq %rbx + ANNOTATE_UNRET_SAFE ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -288,7 +297,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) lea PAGE_SIZE(%rax), %rsi jmp 0b 3: + ANNOTATE_UNRET_SAFE ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index c222fab112cb..f441002c2327 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -236,7 +236,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) return ES_EXCEPTION; } - if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res)) + if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res)) return ES_DECODE_FAILED; } else { res = vc_fetch_insn_kernel(ctxt, buffer); diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S index ee04941a6546..3355e27c69eb 100644 --- a/arch/x86/kernel/sev_verify_cbit.S +++ b/arch/x86/kernel/sev_verify_cbit.S @@ -85,5 +85,5 @@ SYM_FUNC_START(sev_verify_cbit) #endif /* Return page-table pointer */ movq %rdi, %rax - ret + RET SYM_FUNC_END(sev_verify_cbit) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index ca9a380d9c0b..2973b3fb0ec1 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -11,7 +11,17 @@ enum insn_type { RET = 3, /* tramp / site cond-tail-call */ }; -static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) +/* + * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such + * that there is no false-positive trampoline identification while also being a + * speculation stop. + */ +static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; + +static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; + +static void __ref __static_call_transform(void *insn, enum insn_type type, + void *func, bool modinit) { int size = CALL_INSN_SIZE; const void *code; @@ -30,15 +40,17 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void break; case RET: - code = text_gen_insn(RET_INSN_OPCODE, insn, func); - size = RET_INSN_SIZE; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + else + code = &retinsn; break; } if (memcmp(insn, code, size) == 0) return; - if (unlikely(system_state == SYSTEM_BOOTING)) + if (system_state == SYSTEM_BOOTING || modinit) return text_poke_early(insn, code, size); text_poke_bp(insn, code, size, NULL); @@ -85,14 +97,42 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) if (tramp) { __static_call_validate(tramp, true); - __static_call_transform(tramp, __sc_insn(!func, true), func); + __static_call_transform(tramp, __sc_insn(!func, true), func, false); } if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { __static_call_validate(site, tail); - __static_call_transform(site, __sc_insn(!func, tail), func); + __static_call_transform(site, __sc_insn(!func, tail), func, false); } mutex_unlock(&text_mutex); } EXPORT_SYMBOL_GPL(arch_static_call_transform); + +#ifdef CONFIG_RETHUNK +/* + * This is called by apply_returns() to fix up static call trampolines, + * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as + * having a return trampoline. + * + * The problem is that static_call() is available before determining + * X86_FEATURE_RETHUNK and, by implication, running alternatives. + * + * This means that __static_call_transform() above can have overwritten the + * return trampoline and we now need to fix things up to be consistent. + */ +bool __static_call_fixup(void *tramp, u8 op, void *dest) +{ + if (memcmp(tramp+5, tramp_ud, 3)) { + /* Not a trampoline site, not our problem. */ + return false; + } + + mutex_lock(&text_mutex); + if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) + __static_call_transform(tramp, RET, NULL, true); + mutex_unlock(&text_mutex); + + return true; +} +#endif diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 60d2c3798ba2..2f97d1a1032f 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -175,8 +175,7 @@ void set_task_blockstep(struct task_struct *task, bool on) * * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if * task is current or it can't be running, otherwise we can race - * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but - * PTRACE_KILL is not safe. + * with __switch_to_xtra(). We rely on ptrace_freeze_traced(). */ local_irq_disable(); debugctl = get_debugctlmsr(); diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 504fa5425bce..3fd1c81eb5e3 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -68,9 +68,6 @@ static int __init control_va_addr_alignment(char *str) if (*str == 0) return 1; - if (*str == '=') - str++; - if (!strcmp(str, "32")) va_align.flags = ALIGN_VA_32; else if (!strcmp(str, "64")) @@ -80,11 +77,11 @@ static int __init control_va_addr_alignment(char *str) else if (!strcmp(str, "on")) va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; else - return 0; + pr_warn("invalid option value: 'align_va_addr=%s'\n", str); return 1; } -__setup("align_va_addr", control_va_addr_alignment); +__setup("align_va_addr=", control_va_addr_alignment); SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index f6225bf22c02..8032f5f7eef9 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -356,7 +356,7 @@ bool fixup_umip_exception(struct pt_regs *regs) if (!nr_copied) return false; - if (!insn_decode(&insn, regs, buf, nr_copied)) + if (!insn_decode_from_regs(&insn, regs, buf, nr_copied)) return false; umip_inst = identify_insn(&insn); diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 641f0fe1e5b4..1258a5872d12 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -132,9 +132,9 @@ SYM_FUNC_START_LOCAL(verify_cpu) .Lverify_cpu_no_longmode: popf # Restore caller passed flags movl $1,%eax - ret + RET .Lverify_cpu_sse_ok: popf # Restore caller passed flags xorl %eax, %eax - ret + RET SYM_FUNC_END(verify_cpu) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9fef0742de1d..b36f5f42a754 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -142,7 +142,7 @@ SECTIONS #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; - *(.text.__x86.indirect_thunk) + *(.text.__x86.*) __indirect_thunk_end = .; #endif } :text =0xcccc @@ -272,6 +272,27 @@ SECTIONS __parainstructions_end = .; } +#ifdef CONFIG_RETPOLINE + /* + * List of instructions that call/jmp/jcc to retpoline thunks + * __x86_indirect_thunk_*(). These instructions can be patched along + * with alternatives, after which the section can be freed. + */ + . = ALIGN(8); + .retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) { + __retpoline_sites = .; + *(.retpoline_sites) + __retpoline_sites_end = .; + } + + . = ALIGN(8); + .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { + __return_sites = .; + *(.return_sites) + __return_sites_end = .; + } +#endif + /* * struct alt_inst entries. From the header (alternative.h): * "Alternative instructions for different CPU types or capabilities" diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 71e1a2d39f21..737035f16a9e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -188,9 +188,6 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) -#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE 8 - struct opcode { u64 flags : 56; u64 intercept : 8; @@ -304,9 +301,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for * different operand sizes can be reached by calculation, rather than a jump * table (which would be bigger than the code). + * + * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR + * and 1 for the straight line speculation INT3, leaves 7 bytes for the + * body of the function. Currently none is larger than 4. */ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); +#define FASTOP_SIZE 16 + #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ @@ -316,19 +319,21 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); __FOP_FUNC(#name) #define __FOP_RET(name) \ - "ret \n\t" \ + ASM_RET \ ".size " name ", .-" name "\n\t" #define FOP_RET(name) \ __FOP_RET(#name) -#define FOP_START(op) \ +#define __FOP_START(op, align) \ extern void em_##op(struct fastop *fake); \ asm(".pushsection .text, \"ax\" \n\t" \ ".global em_" #op " \n\t" \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ + ".align " __stringify(align) " \n\t" \ "em_" #op ":\n\t" +#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) + #define FOP_END \ ".popsection") @@ -428,19 +433,29 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); FOP_END /* Special case for SETcc - 1 instruction per cc */ + +/* + * Depending on .config the SETcc functions look like: + * + * SETcc %al [3 bytes] + * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] + * INT3 [1 byte; CONFIG_SLS] + */ +#define SETCC_ALIGN 16 + #define FOP_SETCC(op) \ - ".align 4 \n\t" \ + ".align " __stringify(SETCC_ALIGN) " \n\t" \ ".type " #op ", @function \n\t" \ #op ": \n\t" \ #op " %al \n\t" \ - __FOP_RET(#op) + __FOP_RET(#op) \ + ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" asm(".pushsection .fixup, \"ax\"\n" - ".global kvm_fastop_exception \n" - "kvm_fastop_exception: xor %esi, %esi; ret\n" + "kvm_fastop_exception: xor %esi, %esi; " ASM_RET ".popsection"); -FOP_START(setcc) +__FOP_START(setcc, SETCC_ALIGN) FOP_SETCC(seto) FOP_SETCC(setno) FOP_SETCC(setc) @@ -1055,7 +1070,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; - void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); + void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; asm("push %[flags]; popf; " CALL_NOSPEC diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a3ef793fce5f..6ed6b090be94 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -297,6 +297,10 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } + + /* Check if there are APF page ready requests pending */ + if (enabled) + kvm_make_request(KVM_REQ_APF_READY, apic->vcpu); } static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) @@ -2260,6 +2264,8 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) if (value & MSR_IA32_APICBASE_ENABLE) { kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); static_key_slow_dec_deferred(&apic_hw_disabled); + /* Check if there are APF page ready requests pending */ + kvm_make_request(KVM_REQ_APF_READY, vcpu); } else { static_key_slow_inc(&apic_hw_disabled.key); atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index bcf1bd9424f2..6096d0f1a62a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5377,6 +5377,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; int nr_zapped, batch = 0; + bool unstable; restart: list_for_each_entry_safe_reverse(sp, node, @@ -5408,11 +5409,12 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) goto restart; } - if (__kvm_mmu_prepare_zap_page(kvm, sp, - &kvm->arch.zapped_obsolete_pages, &nr_zapped)) { - batch += nr_zapped; + unstable = __kvm_mmu_prepare_zap_page(kvm, sp, + &kvm->arch.zapped_obsolete_pages, &nr_zapped); + batch += nr_zapped; + + if (unstable) goto restart; - } } /* diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 23910e6a3f01..e7feaa7910ab 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1198,8 +1198,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; ret = -ENOMEM; - ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); - save = kzalloc(sizeof(*save), GFP_KERNEL); + ctl = kzalloc(sizeof(*ctl), GFP_KERNEL_ACCOUNT); + save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT); if (!ctl || !save) goto out_free; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6c82ef22985d..7397cc449e2f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -537,7 +537,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) } ret = -ENOMEM; - blob = kmalloc(params.len, GFP_KERNEL); + blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); if (!blob) goto e_free; @@ -676,7 +676,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, if (!IS_ALIGNED(dst_paddr, 16) || !IS_ALIGNED(paddr, 16) || !IS_ALIGNED(size, 16)) { - tpage = (void *)alloc_page(GFP_KERNEL); + tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO); if (!tpage) return -ENOMEM; diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 1ec1ac40e328..c18d812d00cd 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -128,6 +128,15 @@ SYM_FUNC_START(__svm_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize 'ret' if the return is + * from the kernel. + */ + UNTRAIN_RET + /* * Clear all general purpose registers except RSP and RAX to prevent * speculative use of the guest's values, even those that are reloaded @@ -166,5 +175,5 @@ SYM_FUNC_START(__svm_vcpu_run) pop %edi #endif pop %_ASM_BP - ret + RET SYM_FUNC_END(__svm_vcpu_run) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0c2389d0fdaf..09804cad6e2d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -12,6 +12,7 @@ #include "nested.h" #include "pmu.h" #include "trace.h" +#include "vmx.h" #include "x86.h" static bool __read_mostly enable_shadow_vmcs = 1; @@ -3075,35 +3076,8 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->host_state.cr4 = cr4; } - asm( - "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ - "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" - "je 1f \n\t" - __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t" - "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" - "1: \n\t" - "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ - - /* Check if vmlaunch or vmresume is needed */ - "cmpb $0, %c[launched](%[loaded_vmcs])\n\t" - - /* - * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set - * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail - * Valid. vmx_vmenter() directly "returns" RFLAGS, and so the - * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail. - */ - "call vmx_vmenter\n\t" - - CC_SET(be) - : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail) - : [HOST_RSP]"r"((unsigned long)HOST_RSP), - [loaded_vmcs]"r"(vmx->loaded_vmcs), - [launched]"i"(offsetof(struct loaded_vmcs, launched)), - [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), - [wordsize]"i"(sizeof(ulong)) - : "memory" - ); + vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, + __vmx_vcpu_run_flags(vmx)); if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); @@ -3668,12 +3642,34 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) + struct vmcs12 *vmcs12, + u32 vm_exit_reason, u32 exit_intr_info) { u32 idt_vectoring; unsigned int nr; - if (vcpu->arch.exception.injected) { + /* + * Per the SDM, VM-Exits due to double and triple faults are never + * considered to occur during event delivery, even if the double/triple + * fault is the result of an escalating vectoring issue. + * + * Note, the SDM qualifies the double fault behavior with "The original + * event results in a double-fault exception". It's unclear why the + * qualification exists since exits due to double fault can occur only + * while vectoring a different exception (injected events are never + * subject to interception), i.e. there's _always_ an original event. + * + * The SDM also uses NMI as a confusing example for the "original event + * causes the VM exit directly" clause. NMI isn't special in any way, + * the same rule applies to all events that cause an exit directly. + * NMI is an odd choice for the example because NMIs can only occur on + * instruction boundaries, i.e. they _can't_ occur during vectoring. + */ + if ((u16)vm_exit_reason == EXIT_REASON_TRIPLE_FAULT || + ((u16)vm_exit_reason == EXIT_REASON_EXCEPTION_NMI && + is_double_fault(exit_intr_info))) { + vmcs12->idt_vectoring_info_field = 0; + } else if (vcpu->arch.exception.injected) { nr = vcpu->arch.exception.nr; idt_vectoring = nr | VECTORING_INFO_VALID_MASK; @@ -3706,6 +3702,8 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, idt_vectoring |= INTR_TYPE_EXT_INTR; vmcs12->idt_vectoring_info_field = idt_vectoring; + } else { + vmcs12->idt_vectoring_info_field = 0; } } @@ -4143,12 +4141,12 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, /* update exit information fields: */ vmcs12->vm_exit_reason = vm_exit_reason; vmcs12->exit_qualification = exit_qualification; - vmcs12->vm_exit_intr_info = exit_intr_info; - - vmcs12->idt_vectoring_info_field = 0; - vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + /* + * On VM-Exit due to a failed VM-Entry, the VMCS isn't marked launched + * and only EXIT_REASON and EXIT_QUALIFICATION are updated, all other + * exit info fields are unmodified. + */ if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { vmcs12->launch_state = 1; @@ -4160,7 +4158,12 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * Transfer the event that L0 or L1 may wanted to inject into * L2 to IDT_VECTORING_INFO_FIELD. */ - vmcs12_save_pending_event(vcpu, vmcs12); + vmcs12_save_pending_event(vcpu, vmcs12, + vm_exit_reason, exit_intr_info); + + vmcs12->vm_exit_intr_info = exit_intr_info; + vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); /* * According to spec, there's no need to store the guest's diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h new file mode 100644 index 000000000000..edc3f16cc189 --- /dev/null +++ b/arch/x86/kvm/vmx/run_flags.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_RUN_FLAGS_H +#define __KVM_X86_VMX_RUN_FLAGS_H + +#define VMX_RUN_VMRESUME (1 << 0) +#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) + +#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 571d9ad80a59..69c147df957f 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -102,6 +102,11 @@ static inline bool is_breakpoint(u32 intr_info) return is_exception_n(intr_info, BP_VECTOR); } +static inline bool is_double_fault(u32 intr_info) +{ + return is_exception_n(intr_info, DF_VECTOR); +} + static inline bool is_page_fault(u32 intr_info) { return is_exception_n(intr_info, PF_VECTOR); diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 90ad7a6246e3..982138bebb70 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -5,6 +5,7 @@ #include #include #include +#include "run_flags.h" #define WORD_SIZE (BITS_PER_LONG / 8) @@ -30,73 +31,12 @@ .section .noinstr.text, "ax" -/** - * vmx_vmenter - VM-Enter the current loaded VMCS - * - * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME - * - * Returns: - * %RFLAGS.CF is set on VM-Fail Invalid - * %RFLAGS.ZF is set on VM-Fail Valid - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * Note that VMRESUME/VMLAUNCH fall-through and return directly if - * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump - * to vmx_vmexit. - */ -SYM_FUNC_START(vmx_vmenter) - /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ - je 2f - -1: vmresume - ret - -2: vmlaunch - ret - -3: cmpb $0, kvm_rebooting - je 4f - ret -4: ud2 - - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) - -SYM_FUNC_END(vmx_vmenter) - -/** - * vmx_vmexit - Handle a VMX VM-Exit - * - * Returns: - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump - * here after hardware loads the host's state, i.e. this is the destination - * referred to by VMCS.HOST_RIP. - */ -SYM_FUNC_START(vmx_vmexit) -#ifdef CONFIG_RETPOLINE - ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE - /* Preserve guest's RAX, it's used to stuff the RSB. */ - push %_ASM_AX - - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE - - /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ - or $1, %_ASM_AX - - pop %_ASM_AX -.Lvmexit_skip_rsb: -#endif - ret -SYM_FUNC_END(vmx_vmexit) - /** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode - * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @vmx: struct vcpu_vmx * * @regs: unsigned long * (to guest registers) - * @launched: %true if the VMCS has been launched + * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl * * Returns: * 0 on VM-Exit, 1 on VM-Fail @@ -115,24 +55,29 @@ SYM_FUNC_START(__vmx_vcpu_run) #endif push %_ASM_BX + /* Save @vmx for SPEC_CTRL handling */ + push %_ASM_ARG1 + + /* Save @flags for SPEC_CTRL handling */ + push %_ASM_ARG3 + /* * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and * @regs is needed after VM-Exit to save the guest's register values. */ push %_ASM_ARG2 - /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + /* Copy @flags to BL, _ASM_ARG3 is volatile. */ mov %_ASM_ARG3B, %bl - /* Adjust RSP to account for the CALL to vmx_vmenter(). */ - lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 + lea (%_ASM_SP), %_ASM_ARG2 call vmx_update_host_rsp /* Load @regs to RAX. */ mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - cmpb $0, %bl + testb $VMX_RUN_VMRESUME, %bl /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -154,11 +99,36 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Enter guest mode */ - call vmx_vmenter + /* Check EFLAGS.ZF from 'testb' above */ + jz .Lvmlaunch - /* Jump on VM-Fail. */ - jbe 2f + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" + * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. + * So this isn't a typical function and objtool needs to be told to + * save the unwind state here and restore it below. + */ + UNWIND_HINT_SAVE + +/* + * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at + * the 'vmx_vmexit' label below. + */ +.Lvmresume: + vmresume + jmp .Lvmfail + +.Lvmlaunch: + vmlaunch + jmp .Lvmfail + + _ASM_EXTABLE(.Lvmresume, .Lfixup) + _ASM_EXTABLE(.Lvmlaunch, .Lfixup) + +SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + + /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ + UNWIND_HINT_RESTORE /* Temporarily save guest's RAX. */ push %_ASM_AX @@ -185,21 +155,23 @@ SYM_FUNC_START(__vmx_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif - /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ - xor %eax, %eax + /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ + xor %ebx, %ebx +.Lclear_regs: /* - * Clear all general purpose registers except RSP and RAX to prevent + * Clear all general purpose registers except RSP and RBX to prevent * speculative use of the guest's values, even those that are reloaded * via the stack. In theory, an L1 cache miss when restoring registers * could lead to speculative execution with the guest's values. * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially * free. RSP and RAX are exempt as RSP is restored by hardware during - * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. + * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return + * value. */ -1: xor %ecx, %ecx + xor %eax, %eax + xor %ecx, %ecx xor %edx, %edx - xor %ebx, %ebx xor %ebp, %ebp xor %esi, %esi xor %edi, %edi @@ -216,8 +188,32 @@ SYM_FUNC_START(__vmx_vcpu_run) /* "POP" @regs. */ add $WORD_SIZE, %_ASM_SP - pop %_ASM_BX + /* + * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before + * the first unbalanced RET after vmexit! + * + * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB + * entries and (in some cases) RSB underflow. + * + * eIBRS has its own protection against poisoned RSB, so it doesn't + * need the RSB filling sequence. But it does need to be enabled, and a + * single call to retire, before the first unbalanced RET. + */ + + FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ + X86_FEATURE_RSB_VMEXIT_LITE + + + pop %_ASM_ARG2 /* @flags */ + pop %_ASM_ARG1 /* @vmx */ + + call vmx_spec_ctrl_restore_host + + /* Put return value in AX */ + mov %_ASM_BX, %_ASM_AX + + pop %_ASM_BX #ifdef CONFIG_X86_64 pop %r12 pop %r13 @@ -228,11 +224,17 @@ SYM_FUNC_START(__vmx_vcpu_run) pop %edi #endif pop %_ASM_BP - ret + RET + +.Lfixup: + cmpb $0, kvm_rebooting + jne .Lvmfail + ud2 +.Lvmfail: + /* VM-Fail: set return value to 1 */ + mov $1, %_ASM_BX + jmp .Lclear_regs - /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ -2: mov $1, %eax - jmp 1b SYM_FUNC_END(__vmx_vcpu_run) @@ -293,7 +295,7 @@ SYM_FUNC_START(vmread_error_trampoline) pop %_ASM_AX pop %_ASM_BP - ret + RET SYM_FUNC_END(vmread_error_trampoline) SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff) @@ -326,5 +328,5 @@ SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff) */ mov %_ASM_BP, %_ASM_SP pop %_ASM_BP - ret + RET SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 94f5f2129e3b..9b520da3f748 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -226,6 +226,9 @@ static const struct { #define L1D_CACHE_ORDER 4 static void *vmx_l1d_flush_pages; +/* Control for disabling CPU Fill buffer clear */ +static bool __read_mostly vmx_fb_clear_ctrl_available; + static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { struct page *page; @@ -357,6 +360,60 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } +static void vmx_setup_fb_clear_ctrl(void) +{ + u64 msr; + + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) && + !boot_cpu_has_bug(X86_BUG_MDS) && + !boot_cpu_has_bug(X86_BUG_TAA)) { + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_FB_CLEAR_CTRL) + vmx_fb_clear_ctrl_available = true; + } +} + +static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) +{ + u64 msr; + + if (!vmx->disable_fb_clear) + return; + + msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); + msr |= FB_CLEAR_DIS; + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; +} + +static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) +{ + if (!vmx->disable_fb_clear) + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); +} + +static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +{ + vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + + /* + * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS + * at VMEntry. Skip the MSR read/write when a guest has no use case to + * execute VERW. + */ + if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) || + ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO))) + vmx->disable_fb_clear = false; +} + static const struct kernel_param_ops vmentry_l1d_flush_ops = { .set = vmentry_l1d_flush_set, .get = vmentry_l1d_flush_get, @@ -562,7 +619,7 @@ static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) * evmcs in singe VM shares same assist page. */ if (!*p_hv_pa_pg) - *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); + *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); if (!*p_hv_pa_pg) return -ENOMEM; @@ -879,6 +936,24 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) return true; } +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) +{ + unsigned int flags = 0; + + if (vmx->loaded_vmcs->launched) + flags |= VMX_RUN_VMRESUME; + + /* + * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free + * to change it directly without causing a vmexit. In that case read + * it after vmexit and store it in vmx->spec_ctrl. + */ + if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) + flags |= VMX_RUN_SAVE_SPEC_CTRL; + + return flags; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -2211,6 +2286,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ret = kvm_set_msr_common(vcpu, msr_info); } + /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */ + if (msr_index == MSR_IA32_ARCH_CAPABILITIES) + vmx_update_fb_clear_dis(vcpu, vmx); + return ret; } @@ -4483,6 +4562,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vpid_sync_context(vmx->vpid); if (init_event) vmx_clear_hlt(vcpu); + + vmx_update_fb_clear_dis(vcpu, vmx); } static void enable_irq_window(struct kvm_vcpu *vcpu) @@ -6612,6 +6693,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } +void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, + unsigned int flags) +{ + u64 hostval = this_cpu_read(x86_spec_ctrl_current); + + if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) + return; + + if (flags & VMX_RUN_SAVE_SPEC_CTRL) + vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); + + /* + * If the guest/host SPEC_CTRL values differ, restore the host value. + * + * For legacy IBRS, the IBRS bit always needs to be written after + * transitioning from a less privileged predictor mode, regardless of + * whether the guest/host values differ. + */ + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || + vmx->spec_ctrl != hostval) + native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); + + barrier_nospec(); +} + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { switch (to_vmx(vcpu)->exit_reason.basic) { @@ -6624,10 +6730,9 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) } } -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); - static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, - struct vcpu_vmx *vmx) + struct vcpu_vmx *vmx, + unsigned long flags) { /* * VMENTER enables interrupts (host state), but the kernel state is @@ -6654,15 +6759,22 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&mds_user_clear)) mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) + mds_clear_cpu_buffers(); + + vmx_disable_fb_clear(vmx); if (vcpu->arch.cr2 != native_read_cr2()) native_write_cr2(vcpu->arch.cr2); vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + flags); vcpu->arch.cr2 = native_read_cr2(); + vmx_enable_fb_clear(vmx); + /* * VMEXIT disables interrupts (host state), but tracing and lockdep * have them in state 'on' as recorded before entering guest mode. @@ -6756,27 +6868,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); /* The actual VMENTER/EXIT is in the .noinstr.text section. */ - vmx_vcpu_enter_exit(vcpu, vmx); - - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); + vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); /* All fields are clean at this point */ if (static_branch_unlikely(&enable_evmcs)) @@ -8047,6 +8139,8 @@ static int __init vmx_init(void) return r; } + vmx_setup_fb_clear_ctrl(); + for_each_possible_cpu(cpu) { INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 5ff24537393e..a6b52d3a39c9 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -13,6 +13,7 @@ #include "vmcs.h" #include "vmx_ops.h" #include "cpuid.h" +#include "run_flags.h" extern const u32 vmx_msr_index[]; @@ -300,6 +301,8 @@ struct vcpu_vmx { u64 msr_ia32_feature_control; u64 msr_ia32_feature_control_valid_bits; u64 ept_pointer; + u64 msr_ia32_mcu_opt_ctrl; + bool disable_fb_clear; struct pt_desc pt_desc; @@ -363,6 +366,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); +void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1a7c3405773e..29a8ca95c581 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1415,6 +1415,9 @@ static u64 kvm_get_arch_capabilities(void) */ } + /* Guests don't need to know "Fill buffer clear control" exists */ + data &= ~ARCH_CAP_FB_CLEAR_CTRL; + return data; } @@ -8139,15 +8142,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, */ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) { - struct kvm_lapic_irq lapic_irq; + /* + * All other fields are unused for APIC_DM_REMRD, but may be consumed by + * common code, e.g. for tracing. Defer initialization to the compiler. + */ + struct kvm_lapic_irq lapic_irq = { + .delivery_mode = APIC_DM_REMRD, + .dest_mode = APIC_DEST_PHYSICAL, + .shorthand = APIC_DEST_NOSHORT, + .dest_id = apicid, + }; - lapic_irq.shorthand = APIC_DEST_NOSHORT; - lapic_irq.dest_mode = APIC_DEST_PHYSICAL; - lapic_irq.level = 0; - lapic_irq.dest_id = apicid; - lapic_irq.msi_redir_hint = false; - - lapic_irq.delivery_mode = APIC_DM_REMRD; kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } @@ -11153,7 +11158,7 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) if (!kvm_pv_async_pf_enabled(vcpu)) return true; else - return apf_pageready_slot_free(vcpu); + return kvm_lapic_enabled(vcpu) && apf_pageready_slot_free(vcpu); } void kvm_arch_start_assignment(struct kvm *kvm) @@ -11168,9 +11173,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); -bool kvm_arch_has_assigned_device(struct kvm *kvm) +bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) { - return atomic_read(&kvm->arch.assigned_device_count); + return arch_atomic_read(&kvm->arch.assigned_device_count); } EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 3b6544111ac9..e768815e58ae 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -6,84 +6,86 @@ */ #include -#include +#include /* if you want SMP support, implement these with real spinlocks */ -.macro LOCK reg +.macro IRQ_SAVE reg pushfl cli .endm -.macro UNLOCK reg +.macro IRQ_RESTORE reg popfl .endm -#define BEGIN(op) \ +#define BEGIN_IRQ_SAVE(op) \ .macro endp; \ SYM_FUNC_END(atomic64_##op##_386); \ .purgem endp; \ .endm; \ SYM_FUNC_START(atomic64_##op##_386); \ - LOCK v; + IRQ_SAVE v; #define ENDP endp -#define RET \ - UNLOCK v; \ - ret - -#define RET_ENDP \ - RET; \ - ENDP +#define RET_IRQ_RESTORE \ + IRQ_RESTORE v; \ + RET #define v %ecx -BEGIN(read) +BEGIN_IRQ_SAVE(read) movl (v), %eax movl 4(v), %edx -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(set) +BEGIN_IRQ_SAVE(set) movl %ebx, (v) movl %ecx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(xchg) +BEGIN_IRQ_SAVE(xchg) movl (v), %eax movl 4(v), %edx movl %ebx, (v) movl %ecx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %ecx -BEGIN(add) +BEGIN_IRQ_SAVE(add) addl %eax, (v) adcl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %ecx -BEGIN(add_return) +BEGIN_IRQ_SAVE(add_return) addl (v), %eax adcl 4(v), %edx movl %eax, (v) movl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %ecx -BEGIN(sub) +BEGIN_IRQ_SAVE(sub) subl %eax, (v) sbbl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %ecx -BEGIN(sub_return) +BEGIN_IRQ_SAVE(sub_return) negl %edx negl %eax sbbl $0, %edx @@ -91,47 +93,52 @@ BEGIN(sub_return) adcl 4(v), %edx movl %eax, (v) movl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(inc) +BEGIN_IRQ_SAVE(inc) addl $1, (v) adcl $0, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(inc_return) +BEGIN_IRQ_SAVE(inc_return) movl (v), %eax movl 4(v), %edx addl $1, %eax adcl $0, %edx movl %eax, (v) movl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(dec) +BEGIN_IRQ_SAVE(dec) subl $1, (v) sbbl $0, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(dec_return) +BEGIN_IRQ_SAVE(dec_return) movl (v), %eax movl 4(v), %edx subl $1, %eax sbbl $0, %edx movl %eax, (v) movl %edx, 4(v) -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v #define v %esi -BEGIN(add_unless) +BEGIN_IRQ_SAVE(add_unless) addl %eax, %ecx adcl %edx, %edi addl (v), %eax @@ -143,7 +150,7 @@ BEGIN(add_unless) movl %edx, 4(v) movl $1, %eax 2: - RET + RET_IRQ_RESTORE 3: cmpl %edx, %edi jne 1b @@ -153,7 +160,7 @@ ENDP #undef v #define v %esi -BEGIN(inc_not_zero) +BEGIN_IRQ_SAVE(inc_not_zero) movl (v), %eax movl 4(v), %edx testl %eax, %eax @@ -165,7 +172,7 @@ BEGIN(inc_not_zero) movl %edx, 4(v) movl $1, %eax 2: - RET + RET_IRQ_RESTORE 3: testl %edx, %edx jne 1b @@ -174,7 +181,7 @@ ENDP #undef v #define v %esi -BEGIN(dec_if_positive) +BEGIN_IRQ_SAVE(dec_if_positive) movl (v), %eax movl 4(v), %edx subl $1, %eax @@ -183,5 +190,6 @@ BEGIN(dec_if_positive) movl %eax, (v) movl %edx, 4(v) 1: -RET_ENDP + RET_IRQ_RESTORE +ENDP #undef v diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 1c5c81c16b06..90afb488b396 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -6,7 +6,7 @@ */ #include -#include +#include .macro read64 reg movl %ebx, %eax @@ -18,7 +18,7 @@ SYM_FUNC_START(atomic64_read_cx8) read64 %ecx - ret + RET SYM_FUNC_END(atomic64_read_cx8) SYM_FUNC_START(atomic64_set_cx8) @@ -28,7 +28,7 @@ SYM_FUNC_START(atomic64_set_cx8) cmpxchg8b (%esi) jne 1b - ret + RET SYM_FUNC_END(atomic64_set_cx8) SYM_FUNC_START(atomic64_xchg_cx8) @@ -37,7 +37,7 @@ SYM_FUNC_START(atomic64_xchg_cx8) cmpxchg8b (%esi) jne 1b - ret + RET SYM_FUNC_END(atomic64_xchg_cx8) .macro addsub_return func ins insc @@ -68,7 +68,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8) popl %esi popl %ebx popl %ebp - ret + RET SYM_FUNC_END(atomic64_\func\()_return_cx8) .endm @@ -93,7 +93,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8) movl %ebx, %eax movl %ecx, %edx popl %ebx - ret + RET SYM_FUNC_END(atomic64_\func\()_return_cx8) .endm @@ -118,7 +118,7 @@ SYM_FUNC_START(atomic64_dec_if_positive_cx8) movl %ebx, %eax movl %ecx, %edx popl %ebx - ret + RET SYM_FUNC_END(atomic64_dec_if_positive_cx8) SYM_FUNC_START(atomic64_add_unless_cx8) @@ -149,7 +149,7 @@ SYM_FUNC_START(atomic64_add_unless_cx8) addl $8, %esp popl %ebx popl %ebp - ret + RET 4: cmpl %edx, 4(%esp) jne 2b @@ -176,5 +176,5 @@ SYM_FUNC_START(atomic64_inc_not_zero_cx8) movl $1, %eax 3: popl %ebx - ret + RET SYM_FUNC_END(atomic64_inc_not_zero_cx8) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4304320e51f4..929ad1747dea 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -127,7 +127,7 @@ SYM_FUNC_START(csum_partial) 8: popl %ebx popl %esi - ret + RET SYM_FUNC_END(csum_partial) #else @@ -245,7 +245,7 @@ SYM_FUNC_START(csum_partial) 90: popl %ebx popl %esi - ret + RET SYM_FUNC_END(csum_partial) #endif @@ -371,7 +371,7 @@ EXC( movb %cl, (%edi) ) popl %esi popl %edi popl %ecx # equivalent to addl $4,%esp - ret + RET SYM_FUNC_END(csum_partial_copy_generic) #else @@ -447,7 +447,7 @@ EXC( movb %dl, (%edi) ) popl %esi popl %edi popl %ebx - ret + RET SYM_FUNC_END(csum_partial_copy_generic) #undef ROUND diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index c4c7dd115953..fe59b8ac4fcc 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -17,7 +17,7 @@ SYM_FUNC_START(clear_page_rep) movl $4096/8,%ecx xorl %eax,%eax rep stosq - ret + RET SYM_FUNC_END(clear_page_rep) EXPORT_SYMBOL_GPL(clear_page_rep) @@ -39,7 +39,7 @@ SYM_FUNC_START(clear_page_orig) leaq 64(%rdi),%rdi jnz .Lloop nop - ret + RET SYM_FUNC_END(clear_page_orig) EXPORT_SYMBOL_GPL(clear_page_orig) @@ -47,6 +47,6 @@ SYM_FUNC_START(clear_page_erms) movl $4096,%ecx xorl %eax,%eax rep stosb - ret + RET SYM_FUNC_END(clear_page_erms) EXPORT_SYMBOL_GPL(clear_page_erms) diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 3542502faa3b..33c70c0160ea 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -37,11 +37,11 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) popfq mov $1, %al - ret + RET .Lnot_same: popfq xor %al,%al - ret + RET SYM_FUNC_END(this_cpu_cmpxchg16b_emu) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index ca01ed6029f4..6a912d58fecc 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -32,7 +32,7 @@ SYM_FUNC_START(cmpxchg8b_emu) movl %ecx, 4(%esi) popfl - ret + RET .Lnot_same: movl (%esi), %eax @@ -40,7 +40,7 @@ SYM_FUNC_START(cmpxchg8b_emu) movl 4(%esi), %edx popfl - ret + RET SYM_FUNC_END(cmpxchg8b_emu) EXPORT_SYMBOL(cmpxchg8b_emu) diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S index 892d8915f609..fdd1929b1f9d 100644 --- a/arch/x86/lib/copy_mc_64.S +++ b/arch/x86/lib/copy_mc_64.S @@ -86,7 +86,7 @@ SYM_FUNC_START(copy_mc_fragile) .L_done_memcpy_trap: xorl %eax, %eax .L_done: - ret + RET SYM_FUNC_END(copy_mc_fragile) EXPORT_SYMBOL_GPL(copy_mc_fragile) @@ -142,7 +142,7 @@ SYM_FUNC_START(copy_mc_enhanced_fast_string) rep movsb /* Copy successful. Return zero */ xorl %eax, %eax - ret + RET SYM_FUNC_END(copy_mc_enhanced_fast_string) .section .fixup, "ax" @@ -155,7 +155,7 @@ SYM_FUNC_END(copy_mc_enhanced_fast_string) * user-copy routines. */ movq %rcx, %rax - ret + RET .previous diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 2402d4c489d2..30ea644bf446 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -3,7 +3,7 @@ #include #include -#include +#include #include /* @@ -17,7 +17,7 @@ SYM_FUNC_START(copy_page) ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD movl $4096/8, %ecx rep movsq - ret + RET SYM_FUNC_END(copy_page) EXPORT_SYMBOL(copy_page) @@ -85,5 +85,5 @@ SYM_FUNC_START_LOCAL(copy_page_regs) movq (%rsp), %rbx movq 1*8(%rsp), %r12 addq $2*8, %rsp - ret + RET SYM_FUNC_END(copy_page_regs) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 77b9b2a3b5c8..84cee84fc658 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -105,7 +105,7 @@ SYM_FUNC_START(copy_user_generic_unrolled) jnz 21b 23: xor %eax,%eax ASM_CLAC - ret + RET .section .fixup,"ax" 30: shll $6,%ecx @@ -173,7 +173,7 @@ SYM_FUNC_START(copy_user_generic_string) movsb xorl %eax,%eax ASM_CLAC - ret + RET .section .fixup,"ax" 11: leal (%rdx,%rcx,8),%ecx @@ -207,7 +207,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_string) movsb xorl %eax,%eax ASM_CLAC - ret + RET .section .fixup,"ax" 12: movl %ecx,%edx /* ecx is zerorest also */ @@ -239,7 +239,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) 1: rep movsb 2: mov %ecx,%eax ASM_CLAC - ret + RET /* * Return zero to pretend that this copy succeeded. This @@ -250,7 +250,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) */ 3: xorl %eax,%eax ASM_CLAC - ret + RET _ASM_EXTABLE_CPY(1b, 2b) SYM_CODE_END(.Lcopy_user_handle_tail) @@ -361,7 +361,7 @@ SYM_FUNC_START(__copy_user_nocache) xorl %eax,%eax ASM_CLAC sfence - ret + RET .section .fixup,"ax" .L_fixup_4x8b_copy: diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 1fbd8ee9642d..d9e16a2cf285 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -201,7 +201,7 @@ SYM_FUNC_START(csum_partial_copy_generic) movq 3*8(%rsp), %r13 movq 4*8(%rsp), %r15 addq $5*8, %rsp - ret + RET .Lshort: movl %ecx, %r10d jmp .L1 diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 65d15df6212d..0e65d00e2339 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -54,8 +54,8 @@ static void delay_loop(u64 __loops) " jnz 2b \n" "3: dec %0 \n" - : /* we don't need output */ - :"a" (loops) + : "+a" (loops) + : ); } diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index be5b5fb1598b..520897061ee0 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include @@ -10,7 +11,7 @@ asm( ".type just_return_func, @function\n" ".globl just_return_func\n" "just_return_func:\n" - " ret\n" + ASM_RET ".size just_return_func, .-just_return_func\n" ); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index fa1bc2104b32..b70d98d79a9d 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -57,7 +57,7 @@ SYM_FUNC_START(__get_user_1) 1: movzbl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_1) EXPORT_SYMBOL(__get_user_1) @@ -71,7 +71,7 @@ SYM_FUNC_START(__get_user_2) 2: movzwl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_2) EXPORT_SYMBOL(__get_user_2) @@ -85,7 +85,7 @@ SYM_FUNC_START(__get_user_4) 3: movl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_4) EXPORT_SYMBOL(__get_user_4) @@ -100,7 +100,7 @@ SYM_FUNC_START(__get_user_8) 4: movq (%_ASM_AX),%rdx xor %eax,%eax ASM_CLAC - ret + RET #else LOAD_TASK_SIZE_MINUS_N(7) cmp %_ASM_DX,%_ASM_AX @@ -112,7 +112,7 @@ SYM_FUNC_START(__get_user_8) 5: movl 4(%_ASM_AX),%ecx xor %eax,%eax ASM_CLAC - ret + RET #endif SYM_FUNC_END(__get_user_8) EXPORT_SYMBOL(__get_user_8) @@ -124,7 +124,7 @@ SYM_FUNC_START(__get_user_nocheck_1) 6: movzbl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_nocheck_1) EXPORT_SYMBOL(__get_user_nocheck_1) @@ -134,7 +134,7 @@ SYM_FUNC_START(__get_user_nocheck_2) 7: movzwl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_nocheck_2) EXPORT_SYMBOL(__get_user_nocheck_2) @@ -144,7 +144,7 @@ SYM_FUNC_START(__get_user_nocheck_4) 8: movl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_nocheck_4) EXPORT_SYMBOL(__get_user_nocheck_4) @@ -159,7 +159,7 @@ SYM_FUNC_START(__get_user_nocheck_8) #endif xor %eax,%eax ASM_CLAC - ret + RET SYM_FUNC_END(__get_user_nocheck_8) EXPORT_SYMBOL(__get_user_nocheck_8) @@ -169,7 +169,7 @@ SYM_CODE_START_LOCAL(.Lbad_get_user_clac) bad_get_user: xor %edx,%edx mov $(-EFAULT),%_ASM_AX - ret + RET SYM_CODE_END(.Lbad_get_user_clac) #ifdef CONFIG_X86_32 @@ -179,7 +179,7 @@ bad_get_user_8: xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX - ret + RET SYM_CODE_END(.Lbad_get_user_8_clac) #endif diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S index dbf8cc97b7f5..12c16c6aa44a 100644 --- a/arch/x86/lib/hweight.S +++ b/arch/x86/lib/hweight.S @@ -32,7 +32,7 @@ SYM_FUNC_START(__sw_hweight32) imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101 shrl $24, %eax # w = w_tmp >> 24 __ASM_SIZE(pop,) %__ASM_REG(dx) - ret + RET SYM_FUNC_END(__sw_hweight32) EXPORT_SYMBOL(__sw_hweight32) @@ -65,7 +65,7 @@ SYM_FUNC_START(__sw_hweight64) popq %rdx popq %rdi - ret + RET #else /* CONFIG_X86_32 */ /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ pushl %ecx @@ -77,7 +77,7 @@ SYM_FUNC_START(__sw_hweight64) addl %ecx, %eax # result popl %ecx - ret + RET #endif SYM_FUNC_END(__sw_hweight64) EXPORT_SYMBOL(__sw_hweight64) diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 12539fca75c4..b0f3b2a62ae2 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -4,7 +4,7 @@ * * Written by Masami Hiramatsu */ -#include +#include /* __ignore_sync_check__ */ /* Attribute tables are generated from opcode map */ #include "inat-tables.c" diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index c6a19c88af54..ffc8b7dcf1fe 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -928,10 +928,11 @@ static int get_seg_base_limit(struct insn *insn, struct pt_regs *regs, static int get_eff_addr_reg(struct insn *insn, struct pt_regs *regs, int *regoff, long *eff_addr) { - insn_get_modrm(insn); + int ret; - if (!insn->modrm.nbytes) - return -EINVAL; + ret = insn_get_modrm(insn); + if (ret) + return ret; if (X86_MODRM_MOD(insn->modrm.value) != 3) return -EINVAL; @@ -977,14 +978,14 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs, int *regoff, long *eff_addr) { long tmp; + int ret; if (insn->addr_bytes != 8 && insn->addr_bytes != 4) return -EINVAL; - insn_get_modrm(insn); - - if (!insn->modrm.nbytes) - return -EINVAL; + ret = insn_get_modrm(insn); + if (ret) + return ret; if (X86_MODRM_MOD(insn->modrm.value) > 2) return -EINVAL; @@ -1106,18 +1107,21 @@ static int get_eff_addr_modrm_16(struct insn *insn, struct pt_regs *regs, * @base_offset will have a register, as an offset from the base of pt_regs, * that can be used to resolve the associated segment. * - * -EINVAL on error. + * Negative value on error. */ static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs, int *base_offset, long *eff_addr) { long base, indx; int indx_offset; + int ret; if (insn->addr_bytes != 8 && insn->addr_bytes != 4) return -EINVAL; - insn_get_modrm(insn); + ret = insn_get_modrm(insn); + if (ret) + return ret; if (!insn->modrm.nbytes) return -EINVAL; @@ -1125,7 +1129,9 @@ static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs, if (X86_MODRM_MOD(insn->modrm.value) > 2) return -EINVAL; - insn_get_sib(insn); + ret = insn_get_sib(insn); + if (ret) + return ret; if (!insn->sib.nbytes) return -EINVAL; @@ -1194,8 +1200,8 @@ static void __user *get_addr_ref_16(struct insn *insn, struct pt_regs *regs) short eff_addr; long tmp; - insn_get_modrm(insn); - insn_get_displacement(insn); + if (insn_get_displacement(insn)) + goto out; if (insn->addr_bytes != 2) goto out; @@ -1492,7 +1498,7 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_IN } /** - * insn_decode() - Decode an instruction + * insn_decode_from_regs() - Decode an instruction * @insn: Structure to store decoded instruction * @regs: Structure with register values as seen when entering kernel mode * @buf: Buffer containing the instruction bytes @@ -1505,8 +1511,8 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_IN * * True if instruction was decoded, False otherwise. */ -bool insn_decode(struct insn *insn, struct pt_regs *regs, - unsigned char buf[MAX_INSN_SIZE], int buf_size) +bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE], int buf_size) { int seg_defs; @@ -1529,7 +1535,9 @@ bool insn_decode(struct insn *insn, struct pt_regs *regs, insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs); insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs); - insn_get_length(insn); + if (insn_get_length(insn)) + return false; + if (buf_size < insn->length) return false; diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 404279563891..24e89239dcca 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -10,10 +10,13 @@ #else #include #endif -#include -#include +#include /*__ignore_sync_check__ */ +#include /* __ignore_sync_check__ */ -#include +#include +#include + +#include /* __ignore_sync_check__ */ /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ @@ -97,8 +100,12 @@ static void insn_get_emulate_prefix(struct insn *insn) * Populates the @insn->prefixes bitmap, and updates @insn->next_byte * to point to the (first) opcode. No effect if @insn->prefixes.got * is already set. + * + * * Returns: + * 0: on success + * < 0: on error */ -void insn_get_prefixes(struct insn *insn) +int insn_get_prefixes(struct insn *insn) { struct insn_field *prefixes = &insn->prefixes; insn_attr_t attr; @@ -106,7 +113,7 @@ void insn_get_prefixes(struct insn *insn) int i, nb; if (prefixes->got) - return; + return 0; insn_get_emulate_prefix(insn); @@ -217,8 +224,10 @@ void insn_get_prefixes(struct insn *insn) prefixes->got = 1; + return 0; + err_out: - return; + return -ENODATA; } /** @@ -230,16 +239,25 @@ void insn_get_prefixes(struct insn *insn) * If necessary, first collects any preceding (prefix) bytes. * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got * is already 1. + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_opcode(struct insn *insn) +int insn_get_opcode(struct insn *insn) { struct insn_field *opcode = &insn->opcode; + int pfx_id, ret; insn_byte_t op; - int pfx_id; + if (opcode->got) - return; - if (!insn->prefixes.got) - insn_get_prefixes(insn); + return 0; + + if (!insn->prefixes.got) { + ret = insn_get_prefixes(insn); + if (ret) + return ret; + } /* Get first opcode */ op = get_next(insn_byte_t, insn); @@ -254,9 +272,13 @@ void insn_get_opcode(struct insn *insn) insn->attr = inat_get_avx_attribute(op, m, p); if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || (!inat_accept_vex(insn->attr) && - !inat_is_group(insn->attr))) - insn->attr = 0; /* This instruction is bad */ - goto end; /* VEX has only 1 byte for opcode */ + !inat_is_group(insn->attr))) { + /* This instruction is bad */ + insn->attr = 0; + return -EINVAL; + } + /* VEX has only 1 byte for opcode */ + goto end; } insn->attr = inat_get_opcode_attribute(op); @@ -267,13 +289,18 @@ void insn_get_opcode(struct insn *insn) pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); } - if (inat_must_vex(insn->attr)) - insn->attr = 0; /* This instruction is bad */ + + if (inat_must_vex(insn->attr)) { + /* This instruction is bad */ + insn->attr = 0; + return -EINVAL; + } end: opcode->got = 1; + return 0; err_out: - return; + return -ENODATA; } /** @@ -283,15 +310,25 @@ void insn_get_opcode(struct insn *insn) * Populates @insn->modrm and updates @insn->next_byte to point past the * ModRM byte, if any. If necessary, first collects the preceding bytes * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_modrm(struct insn *insn) +int insn_get_modrm(struct insn *insn) { struct insn_field *modrm = &insn->modrm; insn_byte_t pfx_id, mod; + int ret; + if (modrm->got) - return; - if (!insn->opcode.got) - insn_get_opcode(insn); + return 0; + + if (!insn->opcode.got) { + ret = insn_get_opcode(insn); + if (ret) + return ret; + } if (inat_has_modrm(insn->attr)) { mod = get_next(insn_byte_t, insn); @@ -301,17 +338,22 @@ void insn_get_modrm(struct insn *insn) pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_group_attribute(mod, pfx_id, insn->attr); - if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) - insn->attr = 0; /* This is bad */ + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) { + /* Bad insn */ + insn->attr = 0; + return -EINVAL; + } } } if (insn->x86_64 && inat_is_force64(insn->attr)) insn->opnd_bytes = 8; + modrm->got = 1; + return 0; err_out: - return; + return -ENODATA; } @@ -325,11 +367,16 @@ void insn_get_modrm(struct insn *insn) int insn_rip_relative(struct insn *insn) { struct insn_field *modrm = &insn->modrm; + int ret; if (!insn->x86_64) return 0; - if (!modrm->got) - insn_get_modrm(insn); + + if (!modrm->got) { + ret = insn_get_modrm(insn); + if (ret) + return 0; + } /* * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. @@ -343,15 +390,25 @@ int insn_rip_relative(struct insn *insn) * * If necessary, first collects the instruction up to and including the * ModRM byte. + * + * Returns: + * 0: if decoding succeeded + * < 0: otherwise. */ -void insn_get_sib(struct insn *insn) +int insn_get_sib(struct insn *insn) { insn_byte_t modrm; + int ret; if (insn->sib.got) - return; - if (!insn->modrm.got) - insn_get_modrm(insn); + return 0; + + if (!insn->modrm.got) { + ret = insn_get_modrm(insn); + if (ret) + return ret; + } + if (insn->modrm.nbytes) { modrm = (insn_byte_t)insn->modrm.value; if (insn->addr_bytes != 2 && @@ -362,8 +419,10 @@ void insn_get_sib(struct insn *insn) } insn->sib.got = 1; + return 0; + err_out: - return; + return -ENODATA; } @@ -374,15 +433,25 @@ void insn_get_sib(struct insn *insn) * If necessary, first collects the instruction up to and including the * SIB byte. * Displacement value is sign-expanded. + * + * * Returns: + * 0: if decoding succeeded + * < 0: otherwise. */ -void insn_get_displacement(struct insn *insn) +int insn_get_displacement(struct insn *insn) { insn_byte_t mod, rm, base; + int ret; if (insn->displacement.got) - return; - if (!insn->sib.got) - insn_get_sib(insn); + return 0; + + if (!insn->sib.got) { + ret = insn_get_sib(insn); + if (ret) + return ret; + } + if (insn->modrm.nbytes) { /* * Interpreting the modrm byte: @@ -425,9 +494,10 @@ void insn_get_displacement(struct insn *insn) } out: insn->displacement.got = 1; + return 0; err_out: - return; + return -ENODATA; } /* Decode moffset16/32/64. Return 0 if failed */ @@ -538,20 +608,30 @@ static int __get_immptr(struct insn *insn) } /** - * insn_get_immediate() - Get the immediates of instruction + * insn_get_immediate() - Get the immediate in an instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * displacement bytes. * Basically, most of immediates are sign-expanded. Unsigned-value can be - * get by bit masking with ((1 << (nbytes * 8)) - 1) + * computed by bit masking with ((1 << (nbytes * 8)) - 1) + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_immediate(struct insn *insn) +int insn_get_immediate(struct insn *insn) { + int ret; + if (insn->immediate.got) - return; - if (!insn->displacement.got) - insn_get_displacement(insn); + return 0; + + if (!insn->displacement.got) { + ret = insn_get_displacement(insn); + if (ret) + return ret; + } if (inat_has_moffset(insn->attr)) { if (!__get_moffset(insn)) @@ -604,9 +684,10 @@ void insn_get_immediate(struct insn *insn) } done: insn->immediate.got = 1; + return 0; err_out: - return; + return -ENODATA; } /** @@ -615,13 +696,58 @@ void insn_get_immediate(struct insn *insn) * * If necessary, first collects the instruction up to and including the * immediates bytes. - */ -void insn_get_length(struct insn *insn) + * + * Returns: + * - 0 on success + * - < 0 on error +*/ +int insn_get_length(struct insn *insn) { + int ret; + if (insn->length) - return; - if (!insn->immediate.got) - insn_get_immediate(insn); + return 0; + + if (!insn->immediate.got) { + ret = insn_get_immediate(insn); + if (ret) + return ret; + } + insn->length = (unsigned char)((unsigned long)insn->next_byte - (unsigned long)insn->kaddr); + + return 0; +} + +/** + * insn_decode() - Decode an x86 instruction + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @buf_len: length of the insn buffer at @kaddr + * @m: insn mode, see enum insn_mode + * + * Returns: + * 0: if decoding succeeded + * < 0: otherwise. + */ +int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m) +{ + int ret; + +/* #define INSN_MODE_KERN -1 __ignore_sync_check__ mode is only valid in the kernel */ + + if (m == INSN_MODE_KERN) + insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64)); + else + insn_init(insn, kaddr, buf_len, m == INSN_MODE_64); + + ret = insn_get_length(insn); + if (ret) + return ret; + + if (insn_complete(insn)) + return 0; + + return -EINVAL; } diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index cb5a1964506b..a1f9416bf67a 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -11,5 +11,5 @@ SYM_FUNC_START(__iowrite32_copy) movl %edx,%ecx rep movsd - ret + RET SYM_FUNC_END(__iowrite32_copy) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 1e299ac73c86..59cf2343f3d9 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include .pushsection .noinstr.text, "ax" @@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy) rep movsq movl %edx, %ecx rep movsb - ret + RET SYM_FUNC_END(memcpy) SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(memcpy) @@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb - ret + RET SYM_FUNC_END(memcpy_erms) SYM_FUNC_START_LOCAL(memcpy_orig) @@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq %r9, 1*8(%rdi) movq %r10, -2*8(%rdi, %rdx) movq %r11, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_16bytes: cmpl $8, %edx @@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq -1*8(%rsi, %rdx), %r9 movq %r8, 0*8(%rdi) movq %r9, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_8bytes: cmpl $4, %edx @@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movl -4(%rsi, %rdx), %r8d movl %ecx, (%rdi) movl %r8d, -4(%rdi, %rdx) - retq + RET .p2align 4 .Lless_3bytes: subl $1, %edx @@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movb %cl, (%rdi) .Lend: - retq + RET SYM_FUNC_END(memcpy_orig) .popsection diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 41902fe8b859..4b8ee3a2fcc3 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -8,7 +8,7 @@ */ #include #include -#include +#include #include #undef memmove @@ -40,7 +40,7 @@ SYM_FUNC_START(__memmove) /* FSRM implies ERMS => no length checks, do the copy directly */ .Lmemmove_begin_forward: ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS + ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS /* * movsq instruction have many startup latency @@ -205,7 +205,12 @@ SYM_FUNC_START(__memmove) movb (%rsi), %r11b movb %r11b, (%rdi) 13: - retq + RET + +.Lmemmove_erms: + movq %rdx, %rcx + rep movsb + RET SYM_FUNC_END(__memmove) SYM_FUNC_END_ALIAS(memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 0bfd26e4ca9e..d624f2bc42f1 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -3,7 +3,7 @@ #include #include -#include +#include #include /* @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) movl %edx,%ecx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(__memset) SYM_FUNC_END_ALIAS(memset) EXPORT_SYMBOL(memset) @@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms) movq %rdx,%rcx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(memset_erms) SYM_FUNC_START_LOCAL(memset_orig) @@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig) .Lende: movq %r10,%rax - ret + RET .Lbad_alignment: cmpq $7,%rdx diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index a2b9caa5274c..ebd259f31496 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -35,7 +35,7 @@ SYM_FUNC_START(\op\()_safe_regs) movl %edi, 28(%r10) popq %r12 popq %rbx - ret + RET 3: movl $-EIO, %r11d jmp 2b @@ -77,7 +77,7 @@ SYM_FUNC_START(\op\()_safe_regs) popl %esi popl %ebp popl %ebx - ret + RET 3: movl $-EIO, 4(%esp) jmp 2b diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 0ea344c5ea43..ecb2049c1273 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -52,7 +52,7 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL) 1: movb %al,(%_ASM_CX) xor %ecx,%ecx ASM_CLAC - ret + RET SYM_FUNC_END(__put_user_1) EXPORT_SYMBOL(__put_user_1) EXPORT_SYMBOL(__put_user_nocheck_1) @@ -66,7 +66,7 @@ SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL) 2: movw %ax,(%_ASM_CX) xor %ecx,%ecx ASM_CLAC - ret + RET SYM_FUNC_END(__put_user_2) EXPORT_SYMBOL(__put_user_2) EXPORT_SYMBOL(__put_user_nocheck_2) @@ -80,7 +80,7 @@ SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL) 3: movl %eax,(%_ASM_CX) xor %ecx,%ecx ASM_CLAC - ret + RET SYM_FUNC_END(__put_user_4) EXPORT_SYMBOL(__put_user_4) EXPORT_SYMBOL(__put_user_nocheck_4) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index b4c43a9b1483..1221bb099afb 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -4,33 +4,37 @@ #include #include #include -#include +#include #include #include #include #include -.macro THUNK reg .section .text.__x86.indirect_thunk - .align 32 -SYM_FUNC_START(__x86_indirect_thunk_\reg) - JMP_NOSPEC \reg -SYM_FUNC_END(__x86_indirect_thunk_\reg) - -SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg) +.macro RETPOLINE reg ANNOTATE_INTRA_FUNCTION_CALL - call .Ldo_rop_\@ + call .Ldo_rop_\@ .Lspec_trap_\@: UNWIND_HINT_EMPTY pause lfence - jmp .Lspec_trap_\@ + jmp .Lspec_trap_\@ .Ldo_rop_\@: - mov %\reg, (%_ASM_SP) - UNWIND_HINT_RET_OFFSET - ret -SYM_FUNC_END(__x86_retpoline_\reg) + mov %\reg, (%_ASM_SP) + UNWIND_HINT_FUNC + RET +.endm + +.macro THUNK reg + + .align RETPOLINE_THUNK_SIZE +SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_EMPTY + + ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ + __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) .endm @@ -48,16 +52,90 @@ SYM_FUNC_END(__x86_retpoline_\reg) #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) -#define EXPORT_RETPOLINE(reg) __EXPORT_THUNK(__x86_retpoline_ ## reg) -#undef GEN + .align RETPOLINE_THUNK_SIZE +SYM_CODE_START(__x86_indirect_thunk_array) + #define GEN(reg) THUNK reg #include - #undef GEN + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_END(__x86_indirect_thunk_array) + #define GEN(reg) EXPORT_THUNK(reg) #include - #undef GEN -#define GEN(reg) EXPORT_RETPOLINE(reg) -#include + +/* + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ +#ifdef CONFIG_RETHUNK + + .section .text.__x86.return_thunk + +/* + * Safety details here pertain to the AMD Zen{1,2} microarchitecture: + * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * alignment within the BTB. + * 2) The instruction at zen_untrain_ret must contain, and not + * end with, the 0xc3 byte of the RET. + * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread + * from re-poisioning the BTB prediction. + */ + .align 64 + .skip 63, 0xcc +SYM_FUNC_START_NOALIGN(zen_untrain_ret); + + /* + * As executed from zen_untrain_ret, this is: + * + * TEST $0xcc, %bl + * LFENCE + * JMP __x86_return_thunk + * + * Executing the TEST instruction has a side effect of evicting any BTB + * prediction (potentially attacker controlled) attached to the RET, as + * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + */ + .byte 0xf6 + + /* + * As executed from __x86_return_thunk, this is a plain RET. + * + * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. + * + * We subsequently jump backwards and architecturally execute the RET. + * This creates a correct BTB prediction (type=ret), but in the + * meantime we suffer Straight Line Speculation (because the type was + * no branch) which is halted by the INT3. + * + * With SMT enabled and STIBP active, a sibling thread cannot poison + * RET's prediction to a type of its choice, but can evict the + * prediction due to competitive sharing. If the prediction is + * evicted, __x86_return_thunk will suffer Straight Line Speculation + * which will be contained safely by the INT3. + */ +SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) + ret + int3 +SYM_CODE_END(__x86_return_thunk) + + /* + * Ensure the TEST decoding / BTB invalidation is complete. + */ + lfence + + /* + * Jump back and execute the RET in the middle of the TEST instruction. + * INT3 is for SLS protection. + */ + jmp __x86_return_thunk + int3 +SYM_FUNC_END(zen_untrain_ret) +__EXPORT_THUNK(zen_untrain_ret) + +EXPORT_SYMBOL(__x86_return_thunk) + +#endif /* CONFIG_RETHUNK */ diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S index 951da2ad54bb..8c270ab415be 100644 --- a/arch/x86/math-emu/div_Xsig.S +++ b/arch/x86/math-emu/div_Xsig.S @@ -341,7 +341,7 @@ L_exit: popl %esi leave - ret + RET #ifdef PARANOID diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S index d047d1816abe..637439bfefa4 100644 --- a/arch/x86/math-emu/div_small.S +++ b/arch/x86/math-emu/div_small.S @@ -44,5 +44,5 @@ SYM_FUNC_START(FPU_div_small) popl %esi leave - ret + RET SYM_FUNC_END(FPU_div_small) diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S index 4afc7b1fa6e9..54a031b66142 100644 --- a/arch/x86/math-emu/mul_Xsig.S +++ b/arch/x86/math-emu/mul_Xsig.S @@ -62,7 +62,7 @@ SYM_FUNC_START(mul32_Xsig) popl %esi leave - ret + RET SYM_FUNC_END(mul32_Xsig) @@ -115,7 +115,7 @@ SYM_FUNC_START(mul64_Xsig) popl %esi leave - ret + RET SYM_FUNC_END(mul64_Xsig) @@ -175,5 +175,5 @@ SYM_FUNC_START(mul_Xsig_Xsig) popl %esi leave - ret + RET SYM_FUNC_END(mul_Xsig_Xsig) diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S index 702315eecb86..35fd723fc0df 100644 --- a/arch/x86/math-emu/polynom_Xsig.S +++ b/arch/x86/math-emu/polynom_Xsig.S @@ -133,5 +133,5 @@ L_accum_done: popl %edi popl %esi leave - ret + RET SYM_FUNC_END(polynomial_Xsig) diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S index cad1d60b1e84..594936eeed67 100644 --- a/arch/x86/math-emu/reg_norm.S +++ b/arch/x86/math-emu/reg_norm.S @@ -72,7 +72,7 @@ L_exit_valid: L_exit: popl %ebx leave - ret + RET L_zero: @@ -138,7 +138,7 @@ L_exit_nuo_valid: popl %ebx leave - ret + RET L_exit_nuo_zero: movl TAG_Zero,%eax @@ -146,5 +146,5 @@ L_exit_nuo_zero: popl %ebx leave - ret + RET SYM_FUNC_END(FPU_normalize_nuo) diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S index 11a1f798451b..8bdbdcee7456 100644 --- a/arch/x86/math-emu/reg_round.S +++ b/arch/x86/math-emu/reg_round.S @@ -437,7 +437,7 @@ fpu_Arith_exit: popl %edi popl %esi leave - ret + RET /* diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S index 9c9e2c810afe..07247287a3af 100644 --- a/arch/x86/math-emu/reg_u_add.S +++ b/arch/x86/math-emu/reg_u_add.S @@ -164,6 +164,6 @@ L_exit: popl %edi popl %esi leave - ret + RET #endif /* PARANOID */ SYM_FUNC_END(FPU_u_add) diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S index e2fb5c2644c5..b5a41e2fc484 100644 --- a/arch/x86/math-emu/reg_u_div.S +++ b/arch/x86/math-emu/reg_u_div.S @@ -468,7 +468,7 @@ L_exit: popl %esi leave - ret + RET #endif /* PARANOID */ SYM_FUNC_END(FPU_u_div) diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S index 0c779c87ac5b..e2588b24b8c2 100644 --- a/arch/x86/math-emu/reg_u_mul.S +++ b/arch/x86/math-emu/reg_u_mul.S @@ -144,7 +144,7 @@ L_exit: popl %edi popl %esi leave - ret + RET #endif /* PARANOID */ SYM_FUNC_END(FPU_u_mul) diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S index e9bb7c248649..4c900c29e4ff 100644 --- a/arch/x86/math-emu/reg_u_sub.S +++ b/arch/x86/math-emu/reg_u_sub.S @@ -270,5 +270,5 @@ L_exit: popl %edi popl %esi leave - ret + RET SYM_FUNC_END(FPU_u_sub) diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S index d9d7de8dbd7b..126c40473bad 100644 --- a/arch/x86/math-emu/round_Xsig.S +++ b/arch/x86/math-emu/round_Xsig.S @@ -78,7 +78,7 @@ L_exit: popl %esi popl %ebx leave - ret + RET SYM_FUNC_END(round_Xsig) @@ -138,5 +138,5 @@ L_n_exit: popl %esi popl %ebx leave - ret + RET SYM_FUNC_END(norm_Xsig) diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S index 726af985f758..f726bf6f6396 100644 --- a/arch/x86/math-emu/shr_Xsig.S +++ b/arch/x86/math-emu/shr_Xsig.S @@ -45,7 +45,7 @@ SYM_FUNC_START(shr_Xsig) popl %ebx popl %esi leave - ret + RET L_more_than_31: cmpl $64,%ecx @@ -61,7 +61,7 @@ L_more_than_31: movl $0,8(%esi) popl %esi leave - ret + RET L_more_than_63: cmpl $96,%ecx @@ -76,7 +76,7 @@ L_more_than_63: movl %edx,8(%esi) popl %esi leave - ret + RET L_more_than_95: xorl %eax,%eax @@ -85,5 +85,5 @@ L_more_than_95: movl %eax,8(%esi) popl %esi leave - ret + RET SYM_FUNC_END(shr_Xsig) diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S index 4fc89174caf0..f608a28a4c43 100644 --- a/arch/x86/math-emu/wm_shrx.S +++ b/arch/x86/math-emu/wm_shrx.S @@ -55,7 +55,7 @@ SYM_FUNC_START(FPU_shrx) popl %ebx popl %esi leave - ret + RET L_more_than_31: cmpl $64,%ecx @@ -70,7 +70,7 @@ L_more_than_31: movl $0,4(%esi) popl %esi leave - ret + RET L_more_than_63: cmpl $96,%ecx @@ -84,7 +84,7 @@ L_more_than_63: movl %edx,4(%esi) popl %esi leave - ret + RET L_more_than_95: xorl %eax,%eax @@ -92,7 +92,7 @@ L_more_than_95: movl %eax,4(%esi) popl %esi leave - ret + RET SYM_FUNC_END(FPU_shrx) @@ -146,7 +146,7 @@ SYM_FUNC_START(FPU_shrxs) popl %ebx popl %esi leave - ret + RET /* Shift by [0..31] bits */ Ls_less_than_32: @@ -163,7 +163,7 @@ Ls_less_than_32: popl %ebx popl %esi leave - ret + RET /* Shift by [64..95] bits */ Ls_more_than_63: @@ -189,7 +189,7 @@ Ls_more_than_63: popl %ebx popl %esi leave - ret + RET Ls_more_than_95: /* Shift by [96..inf) bits */ @@ -203,5 +203,5 @@ Ls_more_than_95: popl %ebx popl %esi leave - ret + RET SYM_FUNC_END(FPU_shrxs) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 155c95dc1772..88a1e0d60170 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -78,10 +78,20 @@ static uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; -/* Check that the write-protect PAT entry is set for write-protect */ +/* + * Check that the write-protect PAT entry is set for write-protect. + * To do this without making assumptions how PAT has been set up (Xen has + * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache + * mode via the __cachemode2pte_tbl[] into protection bits (those protection + * bits will select a cache mode of WP or better), and then translate the + * protection bits back into the cache mode using __pte2cm_idx() and the + * __pte2cachemode_tbl[] array. This will return the really used cache mode. + */ bool x86_has_pat_wp(void) { - return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP; + uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP]; + + return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP; } enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 7a84fc8bc5c3..145b67299ab6 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -65,7 +65,10 @@ SYM_FUNC_START(sme_encrypt_execute) movq %rbp, %rsp /* Restore original stack pointer */ pop %rbp + /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE ret + int3 SYM_FUNC_END(sme_encrypt_execute) SYM_FUNC_START(__enc_copy) @@ -151,6 +154,9 @@ SYM_FUNC_START(__enc_copy) pop %r12 pop %r15 + /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE ret + int3 .L__enc_copy_end: SYM_FUNC_END(__enc_copy) diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 232932bda4e5..f9c53a710740 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -101,7 +101,7 @@ int pat_debug_enable; static int __init pat_debug_setup(char *str) { pat_debug_enable = 1; - return 0; + return 1; } __setup("debugpat", pat_debug_setup); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 610b43529148..547ec83794c0 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -15,7 +15,6 @@ #include #include #include -#include static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -213,6 +212,14 @@ static void jit_fill_hole(void *area, unsigned int size) struct jit_context { int cleanup_addr; /* Epilogue code offset */ + + /* + * Program specific offsets of labels in the code; these rely on the + * JIT doing at least 2 passes, recording the position on the first + * pass, only to generate the correct offset on the second pass. + */ + int tail_call_direct_label; + int tail_call_indirect_label; }; /* Maximum number of bytes emitted while JITing one eBPF insn */ @@ -372,20 +379,40 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); } -static int get_pop_bytes(bool *callee_regs_used) +#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8) + +static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { - int bytes = 0; + u8 *prog = *pprog; + int cnt = 0; - if (callee_regs_used[3]) - bytes += 2; - if (callee_regs_used[2]) - bytes += 2; - if (callee_regs_used[1]) - bytes += 2; - if (callee_regs_used[0]) - bytes += 1; +#ifdef CONFIG_RETPOLINE + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + EMIT_LFENCE(); + EMIT2(0xFF, 0xE0 + reg); + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); + } else +#endif + EMIT2(0xFF, 0xE0 + reg); - return bytes; + *pprog = prog; +} + +static void emit_return(u8 **pprog, u8 *ip) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + emit_jump(&prog, &__x86_return_thunk, ip); + } else { + EMIT1(0xC3); /* ret */ + if (IS_ENABLED(CONFIG_SLS)) + EMIT1(0xCC); /* int3 */ + } + + *pprog = prog; } /* @@ -403,30 +430,12 @@ static int get_pop_bytes(bool *callee_regs_used) * out: */ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, - u32 stack_depth) + u32 stack_depth, u8 *ip, + struct jit_context *ctx) { int tcc_off = -4 - round_up(stack_depth, 8); - u8 *prog = *pprog; - int pop_bytes = 0; - int off1 = 42; - int off2 = 31; - int off3 = 9; - int cnt = 0; - - /* count the additional bytes used for popping callee regs from stack - * that need to be taken into account for each of the offsets that - * are used for bailing out of the tail call - */ - pop_bytes = get_pop_bytes(callee_regs_used); - off1 += pop_bytes; - off2 += pop_bytes; - off3 += pop_bytes; - - if (stack_depth) { - off1 += 7; - off2 += 7; - off3 += 7; - } + u8 *prog = *pprog, *start = *pprog; + int cnt = 0, offset; /* * rdi - pointer to ctx @@ -441,8 +450,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */ - EMIT2(X86_JBE, OFFSET1); /* jbe out */ + + offset = ctx->tail_call_indirect_label - (prog + 2 - start); + EMIT2(X86_JBE, offset); /* jbe out */ /* * if (tail_call_cnt > MAX_TAIL_CALL_CNT) @@ -450,8 +460,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, */ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE) - EMIT2(X86_JA, OFFSET2); /* ja out */ + + offset = ctx->tail_call_indirect_label - (prog + 2 - start); + EMIT2(X86_JA, offset); /* ja out */ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ @@ -464,12 +475,11 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, * goto out; */ EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */ -#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE) - EMIT2(X86_JE, OFFSET3); /* je out */ - *pprog = prog; - pop_callee_regs(pprog, callee_regs_used); - prog = *pprog; + offset = ctx->tail_call_indirect_label - (prog + 2 - start); + EMIT2(X86_JE, offset); /* je out */ + + pop_callee_regs(&prog, callee_regs_used); EMIT1(0x58); /* pop rax */ if (stack_depth) @@ -486,42 +496,21 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, * rdi == ctx (1st arg) * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET */ - RETPOLINE_RCX_BPF_JIT(); + emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start)); /* out: */ + ctx->tail_call_indirect_label = prog - start; *pprog = prog; } static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, - u8 **pprog, int addr, u8 *image, - bool *callee_regs_used, u32 stack_depth) + u8 **pprog, u8 *ip, + bool *callee_regs_used, u32 stack_depth, + struct jit_context *ctx) { int tcc_off = -4 - round_up(stack_depth, 8); - u8 *prog = *pprog; - int pop_bytes = 0; - int off1 = 20; - int poke_off; - int cnt = 0; - - /* count the additional bytes used for popping callee regs to stack - * that need to be taken into account for jump offset that is used for - * bailing out from of the tail call when limit is reached - */ - pop_bytes = get_pop_bytes(callee_regs_used); - off1 += pop_bytes; - - /* - * total bytes for: - * - nop5/ jmpq $off - * - pop callee regs - * - sub rsp, $val if depth > 0 - * - pop rax - */ - poke_off = X86_PATCH_SIZE + pop_bytes + 1; - if (stack_depth) { - poke_off += 7; - off1 += 7; - } + u8 *prog = *pprog, *start = *pprog; + int cnt = 0, offset; /* * if (tail_call_cnt > MAX_TAIL_CALL_CNT) @@ -529,28 +518,30 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, */ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ - EMIT2(X86_JA, off1); /* ja out */ + + offset = ctx->tail_call_direct_label - (prog + 2 - start); + EMIT2(X86_JA, offset); /* ja out */ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ - poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE); + poke->tailcall_bypass = ip + (prog - start); poke->adj_off = X86_TAIL_CALL_OFFSET; - poke->tailcall_target = image + (addr - X86_PATCH_SIZE); + poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE; poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE; emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, poke->tailcall_bypass); - *pprog = prog; - pop_callee_regs(pprog, callee_regs_used); - prog = *pprog; + pop_callee_regs(&prog, callee_regs_used); EMIT1(0x58); /* pop rax */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE); prog += X86_PATCH_SIZE; + /* out: */ + ctx->tail_call_direct_label = prog - start; *pprog = prog; } @@ -1141,8 +1132,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, /* speculation barrier */ case BPF_ST | BPF_NOSPEC: if (boot_cpu_has(X86_FEATURE_XMM2)) - /* Emit 'lfence' */ - EMIT3(0x0F, 0xAE, 0xE8); + EMIT_LFENCE(); break; /* ST: *(u8*)(dst_reg + off) = imm */ @@ -1258,8 +1248,9 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP | BPF_CALL: func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { + /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ EMIT3_off32(0x48, 0x8B, 0x85, - -(bpf_prog->aux->stack_depth + 8)); + -round_up(bpf_prog->aux->stack_depth, 8) - 8); if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7)) return -EINVAL; } else { @@ -1271,13 +1262,16 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP | BPF_TAIL_CALL: if (imm32) emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], - &prog, addrs[i], image, + &prog, image + addrs[i - 1], callee_regs_used, - bpf_prog->aux->stack_depth); + bpf_prog->aux->stack_depth, + ctx); else emit_bpf_tail_call_indirect(&prog, callee_regs_used, - bpf_prog->aux->stack_depth); + bpf_prog->aux->stack_depth, + image + addrs[i - 1], + ctx); break; /* cond jump */ @@ -1462,7 +1456,7 @@ xadd: if (is_imm8(insn->off)) ctx->cleanup_addr = proglen; pop_callee_regs(&prog, callee_regs_used); EMIT1(0xC9); /* leave */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; default: @@ -1903,7 +1897,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, prog); /* Make sure the trampoline generation logic doesn't overflow */ if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { ret = -EFAULT; @@ -1916,26 +1910,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i return ret; } -static int emit_fallback_jump(u8 **pprog) -{ - u8 *prog = *pprog; - int err = 0; - -#ifdef CONFIG_RETPOLINE - /* Note that this assumes the the compiler uses external - * thunks for indirect calls. Both clang and GCC use the same - * naming convention for external thunks. - */ - err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog); -#else - int cnt = 0; - - EMIT2(0xFF, 0xE2); /* jmp rdx */ -#endif - *pprog = prog; - return err; -} - static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) { u8 *jg_reloc, *prog = *pprog; @@ -1957,9 +1931,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) if (err) return err; - err = emit_fallback_jump(&prog); /* jmp thunk/indirect */ - if (err) - return err; + emit_indirect_jump(&prog, 2 /* rdx */, prog); *pprog = prog; return 0; diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 4bd0f98df700..622af951220c 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -15,6 +15,7 @@ #include #include #include +#include #include /* @@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth) *pprog = prog; } +static int emit_jmp_edx(u8 **pprog, u8 *ip) +{ + u8 *prog = *pprog; + int cnt = 0; + +#ifdef CONFIG_RETPOLINE + EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5)); +#else + EMIT2(0xFF, 0xE2); +#endif + *pprog = prog; + + return cnt; +} + /* * Generate the following code: * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... @@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth) * goto *(prog->bpf_func + prologue_size); * out: */ -static void emit_bpf_tail_call(u8 **pprog) +static void emit_bpf_tail_call(u8 **pprog, u8 *ip) { u8 *prog = *pprog; int cnt = 0; @@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **pprog) * eax == ctx (1st arg) * edx == prog->bpf_func + prologue_size */ - RETPOLINE_EDX_BPF_JIT(); + cnt += emit_jmp_edx(&prog, ip + cnt); if (jmp_label1 == -1) jmp_label1 = cnt; @@ -1929,7 +1945,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, break; } case BPF_JMP | BPF_TAIL_CALL: - emit_bpf_tail_call(&prog); + emit_bpf_tail_call(&prog, image + addrs[i - 1]); break; /* cond jump */ diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index 09ec84f6ef51..f3cfdb1c9a35 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -56,5 +56,5 @@ SYM_FUNC_START(efi_call_svam) movl 16(%esp), %ebx leave - ret + RET SYM_FUNC_END(efi_call_svam) diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 90380a17ab23..2206b8bc47b8 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -23,5 +23,5 @@ SYM_FUNC_START(__efi_call) mov %rsi, %rcx CALL_NOSPEC rdi leave - ret + RET SYM_FUNC_END(__efi_call) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 26f0da238c1c..9eac088ef4de 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -22,6 +22,7 @@ #include #include #include +#include .text .code64 @@ -63,7 +64,9 @@ SYM_CODE_START(__efi64_thunk) 1: movq 24(%rsp), %rsp pop %rbx pop %rbp - retq + ANNOTATE_UNRET_SAFE + ret + int3 .code32 2: pushl $__KERNEL_CS diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S index 75f4faff8468..3a5abffe5660 100644 --- a/arch/x86/platform/olpc/xo1-wakeup.S +++ b/arch/x86/platform/olpc/xo1-wakeup.S @@ -77,7 +77,7 @@ save_registers: pushfl popl saved_context_eflags - ret + RET restore_registers: movl saved_context_ebp, %ebp @@ -88,7 +88,7 @@ restore_registers: pushl saved_context_eflags popfl - ret + RET SYM_CODE_START(do_olpc_suspend_lowlevel) call save_processor_state @@ -109,7 +109,7 @@ ret_point: call restore_registers call restore_processor_state - ret + RET SYM_CODE_END(do_olpc_suspend_lowlevel) .data diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index 8786653ad3c0..5606a15cf9a1 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -32,7 +32,7 @@ SYM_FUNC_START(swsusp_arch_suspend) FRAME_BEGIN call swsusp_save FRAME_END - ret + RET SYM_FUNC_END(swsusp_arch_suspend) SYM_CODE_START(restore_image) @@ -108,5 +108,5 @@ SYM_FUNC_START(restore_registers) /* tell the hibernation core that we've just restored the memory */ movl %eax, in_suspend - ret + RET SYM_FUNC_END(restore_registers) diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 7918b8415f13..3ae7a3d7d61e 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -49,7 +49,7 @@ SYM_FUNC_START(swsusp_arch_suspend) FRAME_BEGIN call swsusp_save FRAME_END - ret + RET SYM_FUNC_END(swsusp_arch_suspend) SYM_CODE_START(restore_image) @@ -143,5 +143,5 @@ SYM_FUNC_START(restore_registers) /* tell the hibernation core that we've just restored the memory */ movq %rax, in_suspend(%rip) - ret + RET SYM_FUNC_END(restore_registers) diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S index 13f118dec74f..aed782ab7721 100644 --- a/arch/x86/um/checksum_32.S +++ b/arch/x86/um/checksum_32.S @@ -110,7 +110,7 @@ csum_partial: 7: popl %ebx popl %esi - ret + RET #else @@ -208,7 +208,7 @@ csum_partial: 80: popl %ebx popl %esi - ret + RET #endif EXPORT_SYMBOL(csum_partial) diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 3ee234b6234d..255a44dd415a 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c @@ -23,9 +23,11 @@ static long write_ldt_entry(struct mm_id *mm_idp, int func, { long res; void *stub_addr; + + BUILD_BUG_ON(sizeof(*desc) % sizeof(long)); + res = syscall_stub_data(mm_idp, (unsigned long *)desc, - (sizeof(*desc) + sizeof(long) - 1) & - ~(sizeof(long) - 1), + sizeof(*desc) / sizeof(long), addr, &stub_addr); if (!res) { unsigned long args[] = { func, diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S index 62eaf8c80e04..2d991ddbcca5 100644 --- a/arch/x86/um/setjmp_32.S +++ b/arch/x86/um/setjmp_32.S @@ -34,7 +34,7 @@ kernel_setjmp: movl %esi,12(%edx) movl %edi,16(%edx) movl %ecx,20(%edx) # Return address - ret + RET .size kernel_setjmp,.-kernel_setjmp diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S index 1b5d40d4ff46..b46acb6a8ebd 100644 --- a/arch/x86/um/setjmp_64.S +++ b/arch/x86/um/setjmp_64.S @@ -33,7 +33,7 @@ kernel_setjmp: movq %r14,40(%rdi) movq %r15,48(%rdi) movq %rsi,56(%rdi) # Return address - ret + RET .size kernel_setjmp,.-kernel_setjmp diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h index 8a7d5e1da98e..1e6875b4ffd8 100644 --- a/arch/x86/um/shared/sysdep/syscalls_64.h +++ b/arch/x86/um/shared/sysdep/syscalls_64.h @@ -10,13 +10,12 @@ #include #include -typedef long syscall_handler_t(void); +typedef long syscall_handler_t(long, long, long, long, long, long); extern syscall_handler_t *sys_call_table[]; #define EXECUTE_SYSCALL(syscall, regs) \ - (((long (*)(long, long, long, long, long, long)) \ - (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ + (((*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ UPT_SYSCALL_ARG2(®s->regs), \ UPT_SYSCALL_ARG3(®s->regs), \ UPT_SYSCALL_ARG4(®s->regs), \ diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index fc5c5ba4aacb..40b5779fce21 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_xen-asm.o := y ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8bfc10330107..1f80dd3a2dd4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -922,7 +922,7 @@ void xen_enable_sysenter(void) if (!boot_cpu_has(sysenter_feature)) return; - ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); + ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); if(ret != 0) setup_clear_cpu_cap(sysenter_feature); } @@ -931,7 +931,7 @@ void xen_enable_syscall(void) { int ret; - ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); + ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); if (ret != 0) { printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); /* Pretty fatal; 64-bit userspace has no other @@ -940,7 +940,7 @@ void xen_enable_syscall(void) if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { ret = register_callback(CALLBACKTYPE_syscall32, - xen_syscall32_target); + xen_entry_SYSCALL_compat); if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 011ec649f388..e3031afcb103 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -44,7 +45,7 @@ SYM_FUNC_START(xen_irq_enable_direct) call check_events 1: FRAME_END - ret + RET SYM_FUNC_END(xen_irq_enable_direct) @@ -54,7 +55,7 @@ SYM_FUNC_END(xen_irq_enable_direct) */ SYM_FUNC_START(xen_irq_disable_direct) movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - ret + RET SYM_FUNC_END(xen_irq_disable_direct) /* @@ -70,7 +71,7 @@ SYM_FUNC_START(xen_save_fl_direct) testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah addb %ah, %ah - ret + RET SYM_FUNC_END(xen_save_fl_direct) @@ -97,7 +98,7 @@ SYM_FUNC_START(xen_restore_fl_direct) call check_events 1: FRAME_END - ret + RET SYM_FUNC_END(xen_restore_fl_direct) @@ -127,7 +128,7 @@ SYM_FUNC_START(check_events) pop %rcx pop %rax FRAME_END - ret + RET SYM_FUNC_END(check_events) SYM_FUNC_START(xen_read_cr2) @@ -135,18 +136,19 @@ SYM_FUNC_START(xen_read_cr2) _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX FRAME_END - ret + RET SYM_FUNC_END(xen_read_cr2); SYM_FUNC_START(xen_read_cr2_direct) FRAME_BEGIN _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX FRAME_END - ret + RET SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) + UNWIND_HINT_ENTRY pop %rcx pop %r11 jmp \name @@ -186,6 +188,7 @@ xen_pv_trap asm_exc_xen_hypervisor_callback SYM_CODE_START(xen_early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS + UNWIND_HINT_EMPTY pop %rcx pop %r11 jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE @@ -212,11 +215,13 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 * rsp->rax } */ SYM_CODE_START(xen_iret) + UNWIND_HINT_EMPTY pushq $0 jmp hypercall_iret SYM_CODE_END(xen_iret) SYM_CODE_START(xen_sysret64) + UNWIND_HINT_EMPTY /* * We're already on the usermode stack at this point, but * still with the kernel gs, so we can easily switch back. @@ -271,7 +276,8 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) */ /* Normal 64-bit system call target */ -SYM_FUNC_START(xen_syscall_target) +SYM_CODE_START(xen_entry_SYSCALL_64) + UNWIND_HINT_ENTRY popq %rcx popq %r11 @@ -284,12 +290,13 @@ SYM_FUNC_START(xen_syscall_target) movq $__USER_CS, 1*8(%rsp) jmp entry_SYSCALL_64_after_hwframe -SYM_FUNC_END(xen_syscall_target) +SYM_CODE_END(xen_entry_SYSCALL_64) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ -SYM_FUNC_START(xen_syscall32_target) +SYM_CODE_START(xen_entry_SYSCALL_compat) + UNWIND_HINT_ENTRY popq %rcx popq %r11 @@ -302,10 +309,11 @@ SYM_FUNC_START(xen_syscall32_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSCALL_compat_after_hwframe -SYM_FUNC_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSCALL_compat) /* 32-bit compat sysenter target */ -SYM_FUNC_START(xen_sysenter_target) +SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_ENTRY /* * NB: Xen is polite and clears TF from EFLAGS for us. This means * that we don't need to guard against single step exceptions here. @@ -322,17 +330,18 @@ SYM_FUNC_START(xen_sysenter_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSENTER_compat_after_hwframe -SYM_FUNC_END(xen_sysenter_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) #else /* !CONFIG_IA32_EMULATION */ -SYM_FUNC_START_ALIAS(xen_syscall32_target) -SYM_FUNC_START(xen_sysenter_target) +SYM_CODE_START(xen_entry_SYSCALL_compat) +SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_ENTRY lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 jmp hypercall_iret -SYM_FUNC_END(xen_sysenter_target) -SYM_FUNC_END_ALIAS(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) +SYM_CODE_END(xen_entry_SYSCALL_compat) #endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 2d7c8f34f56c..2a3ef5fcba34 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -68,8 +68,10 @@ SYM_CODE_END(asm_cpu_bringup_and_idle) .balign PAGE_SIZE SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) - UNWIND_HINT_EMPTY - .skip 32 + UNWIND_HINT_FUNC + ANNOTATE_UNRET_SAFE + ret + .skip 31, 0xcc .endr #define HYPERCALL(n) \ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9546c3384c75..8695809b88f0 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,10 +10,10 @@ /* These are code, but not functions. Defined in entry.S */ extern const char xen_failsafe_callback[]; -void xen_sysenter_target(void); +void xen_entry_SYSENTER_compat(void); #ifdef CONFIG_X86_64 -void xen_syscall_target(void); -void xen_syscall32_target(void); +void xen_entry_SYSCALL_64(void); +void xen_entry_SYSCALL_compat(void); #endif extern void *xen_initial_gdt; diff --git a/arch/xtensa/include/asm/timex.h b/arch/xtensa/include/asm/timex.h index 233ec75e60c6..3f2462f2d027 100644 --- a/arch/xtensa/include/asm/timex.h +++ b/arch/xtensa/include/asm/timex.h @@ -29,10 +29,6 @@ extern unsigned long ccount_freq; -typedef unsigned long long cycles_t; - -#define get_cycles() (0) - void local_timer_setup(unsigned cpu); /* @@ -59,4 +55,6 @@ static inline void set_linux_timer (unsigned long ccompare) xtensa_set_sr(ccompare, SREG_CCOMPARE + LINUX_TIMER); } +#include + #endif /* _XTENSA_TIMEX_H */ diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index bb3f4797d212..db6cdea471d8 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -226,12 +226,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_SINGLESTEP; + set_tsk_thread_flag(child, TIF_SINGLESTEP); } void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_SINGLESTEP; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); } /* diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index 1fb1047f905c..1cb230fafdf2 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -465,7 +465,7 @@ static void do_signal(struct pt_regs *regs) /* Set up the stack frame */ ret = setup_frame(&ksig, sigmask_to_save(), regs); signal_setup_done(ret, &ksig, 0); - if (current->ptrace & PT_SINGLESTEP) + if (test_thread_flag(TIF_SINGLESTEP)) task_pt_regs(current)->icountlevel = 1; return; @@ -491,7 +491,7 @@ static void do_signal(struct pt_regs *regs) /* If there's no signal to deliver, we just restore the saved mask. */ restore_saved_sigmask(); - if (current->ptrace & PT_SINGLESTEP) + if (test_thread_flag(TIF_SINGLESTEP)) task_pt_regs(current)->icountlevel = 1; return; } diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 77971fe4cc95..8e81ba63ed38 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -154,6 +154,7 @@ static void __init calibrate_ccount(void) cpu = of_find_compatible_node(NULL, NULL, "cdns,xtensa-cpu"); if (cpu) { clk = of_clk_get(cpu, 0); + of_node_put(cpu); if (!IS_ERR(clk)) { ccount_freq = clk_get_rate(clk); return; diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 3447556d276d..2b3c829f655b 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -213,12 +213,18 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf, struct simdisk *dev = PDE_DATA(file_inode(file)); const char *s = dev->filename; if (s) { - ssize_t n = simple_read_from_buffer(buf, size, ppos, - s, strlen(s)); - if (n < 0) - return n; - buf += n; - size -= n; + ssize_t len = strlen(s); + char *temp = kmalloc(len + 2, GFP_KERNEL); + + if (!temp) + return -ENOMEM; + + len = scnprintf(temp, len + 2, "%s\n", s); + len = simple_read_from_buffer(buf, size, ppos, + temp, len); + + kfree(temp); + return len; } return simple_read_from_buffer(buf, size, ppos, "\n", 1); } diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 538e6748e85a..c79c1d09ea86 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -133,6 +133,7 @@ static int __init machine_setup(void) if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc"))) update_local_mac(eth); + of_node_put(eth); return 0; } arch_initcall(machine_setup); diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index c2fdd6fcdaee..be6733558b83 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -553,6 +553,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; + bfqg->online = true; bfqg->rq_pos_tree = RB_ROOT; } @@ -581,28 +582,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg, entity->sched_data = &parent->sched_data; } -static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd, - struct blkcg *blkcg) +static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg) { - struct blkcg_gq *blkg; - - blkg = blkg_lookup(blkcg, bfqd->queue); - if (likely(blkg)) - return blkg_to_bfqg(blkg); - return NULL; -} - -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, - struct blkcg *blkcg) -{ - struct bfq_group *bfqg, *parent; + struct bfq_group *parent; struct bfq_entity *entity; - bfqg = bfq_lookup_bfqg(bfqd, blkcg); - - if (unlikely(!bfqg)) - return NULL; - /* * Update chain of bfq_groups as we might be handling a leaf group * which, along with some of its relatives, has not been hooked yet @@ -619,8 +603,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, bfq_group_set_parent(curr_bfqg, parent); } } +} - return bfqg; +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + struct bfq_group *bfqg; + + while (blkg) { + bfqg = blkg_to_bfqg(blkg); + if (bfqg->online) { + bio_associate_blkg_from_css(bio, &blkg->blkcg->css); + return bfqg; + } + blkg = blkg->parent; + } + bio_associate_blkg_from_css(bio, + &bfqg_to_blkg(bfqd->root_group)->blkcg->css); + return bfqd->root_group; } /** @@ -696,25 +696,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) - * - * NOTE: an alternative approach might have been to store the current - * cgroup in bfqq and getting a reference to it, reducing the lookup - * time here, at the price of slightly more complex code. */ -static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, - struct bfq_io_cq *bic, - struct blkcg *blkcg) +static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bfq_group *bfqg) { struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); - struct bfq_group *bfqg; struct bfq_entity *entity; - bfqg = bfq_find_set_group(bfqd, blkcg); - - if (unlikely(!bfqg)) - bfqg = bfqd->root_group; - if (async_bfqq) { entity = &async_bfqq->entity; @@ -725,9 +715,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, } if (sync_bfqq) { - entity = &sync_bfqq->entity; - if (entity->sched_data != &bfqg->sched_data) - bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { + /* We are the only user of this bfqq, just move it */ + if (sync_bfqq->entity.sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + } else { + struct bfq_queue *bfqq; + + /* + * The queue was merged to a different queue. Check + * that the merge chain still belongs to the same + * cgroup. + */ + for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) + if (bfqq->entity.sched_data != + &bfqg->sched_data) + break; + if (bfqq) { + /* + * Some queue changed cgroup so the merge is + * not valid anymore. We cannot easily just + * cancel the merge (by clearing new_bfqq) as + * there may be other processes using this + * queue and holding refs to all queues below + * sync_bfqq->new_bfqq. Similarly if the merge + * already happened, we need to detach from + * bfqq now so that we cannot merge bio to a + * request from the old cgroup. + */ + bfq_put_cooperator(sync_bfqq); + bfq_release_process_ref(bfqd, sync_bfqq); + bic_set_bfqq(bic, NULL, 1); + } + } } return bfqg; @@ -736,20 +756,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); - struct bfq_group *bfqg = NULL; + struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio); uint64_t serial_nr; - rcu_read_lock(); - serial_nr = __bio_blkcg(bio)->css.serial_nr; + serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger * spuriously on a newly created cic but there's no harm. */ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) - goto out; + return; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); + /* + * New cgroup for this process. Make sure it is linked to bfq internal + * cgroup hierarchy. + */ + bfq_link_bfqg(bfqd, bfqg); + __bfq_bic_change_cgroup(bfqd, bic, bfqg); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following @@ -802,8 +826,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) */ blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path)); bic->blkcg_serial_nr = serial_nr; -out: - rcu_read_unlock(); } /** @@ -931,6 +953,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) put_async_queues: bfq_put_async_queues(bfqd, bfqg); + bfqg->online = false; spin_unlock_irqrestore(&bfqd->lock, flags); /* @@ -1420,7 +1443,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd) bfq_end_wr_async_queues(bfqd, bfqd->root_group); } -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg) +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { return bfqd->root_group; } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 4edec39ba1ba..c65c37f9e33d 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2227,10 +2227,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, spin_lock_irq(&bfqd->lock); - if (bic) + if (bic) { + /* + * Make sure cgroup info is uptodate for current process before + * considering the merge. + */ + bfq_bic_update_cgroup(bic, bio); + bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf)); - else + } else { bfqd->bio_bfqq = NULL; + } bfqd->bio_bic = bic; ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); @@ -2260,8 +2267,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, return ELEVATOR_NO_MERGE; } -static struct bfq_queue *bfq_init_rq(struct request *rq); - static void bfq_request_merged(struct request_queue *q, struct request *req, enum elv_merge type) { @@ -2270,7 +2275,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, blk_rq_pos(req) < blk_rq_pos(container_of(rb_prev(&req->rb_node), struct request, rb_node))) { - struct bfq_queue *bfqq = bfq_init_rq(req); + struct bfq_queue *bfqq = RQ_BFQQ(req); struct bfq_data *bfqd; struct request *prev, *next_rq; @@ -2322,8 +2327,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, static void bfq_requests_merged(struct request_queue *q, struct request *rq, struct request *next) { - struct bfq_queue *bfqq = bfq_init_rq(rq), - *next_bfqq = bfq_init_rq(next); + struct bfq_queue *bfqq = RQ_BFQQ(rq), + *next_bfqq = RQ_BFQQ(next); if (!bfqq) return; @@ -2502,6 +2507,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) if (process_refs == 0 || new_process_refs == 0) return NULL; + /* + * Make sure merged queues belong to the same parent. Parents could + * have changed since the time we decided the two queues are suitable + * for merging. + */ + if (new_bfqq->entity.parent != bfqq->entity.parent) + return NULL; + bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", new_bfqq->pid); @@ -4914,7 +4927,7 @@ void bfq_put_queue(struct bfq_queue *bfqq) bfqg_and_blkg_put(bfqg); } -static void bfq_put_cooperator(struct bfq_queue *bfqq) +void bfq_put_cooperator(struct bfq_queue *bfqq) { struct bfq_queue *__bfqq, *next; @@ -5146,14 +5159,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, struct bfq_queue *bfqq; struct bfq_group *bfqg; - rcu_read_lock(); - - bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); - if (!bfqg) { - bfqq = &bfqd->oom_bfqq; - goto out; - } - + bfqg = bfq_bio_bfqg(bfqd, bio); if (!is_sync) { async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, ioprio); @@ -5197,7 +5203,6 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, out: bfqq->ref++; /* get a process reference to this queue */ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref); - rcu_read_unlock(); return bfqq; } @@ -5500,6 +5505,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q, unsigned int cmd_flags) {} #endif /* CONFIG_BFQ_CGROUP_DEBUG */ +static struct bfq_queue *bfq_init_rq(struct request *rq); + static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) { @@ -5514,17 +5521,14 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bfqg_stats_update_legacy_io(q, rq); #endif spin_lock_irq(&bfqd->lock); + bfqq = bfq_init_rq(rq); if (blk_mq_sched_try_insert_merge(q, rq)) { spin_unlock_irq(&bfqd->lock); return; } - spin_unlock_irq(&bfqd->lock); - blk_mq_sched_request_inserted(rq); - spin_lock_irq(&bfqd->lock); - bfqq = bfq_init_rq(rq); if (!bfqq || at_head || blk_rq_is_passthrough(rq)) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 703895224562..2a4a6f44efff 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -901,6 +901,8 @@ struct bfq_group { /* reference counter (see comments in bfq_bic_update_cgroup) */ int ref; + /* Is bfq_group still online? */ + bool online; struct bfq_entity entity; struct bfq_sched_data sched_data; @@ -954,6 +956,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool compensate, enum bfqq_expiration reason); void bfq_put_queue(struct bfq_queue *bfqq); +void bfq_put_cooperator(struct bfq_queue *bfqq); void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_schedule_dispatch(struct bfq_data *bfqd); @@ -981,8 +984,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg); void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio); void bfq_end_wr_async(struct bfq_data *bfqd); -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, - struct blkcg *blkcg); +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio); struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 22fb1beed49a..39c94ee6c4a5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1898,12 +1898,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg); */ void bio_clone_blkg_association(struct bio *dst, struct bio *src) { - if (src->bi_blkg) { - if (dst->bi_blkg) - blkg_put(dst->bi_blkg); - blkg_get(src->bi_blkg); - dst->bi_blkg = src->bi_blkg; - } + if (src->bi_blkg) + bio_associate_blkg_from_css(dst, &bio_blkcg(src)->css); } EXPORT_SYMBOL_GPL(bio_clone_blkg_association); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index d8b0d8bd132b..74511a060d59 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -86,7 +86,17 @@ struct iolatency_grp; struct blk_iolatency { struct rq_qos rqos; struct timer_list timer; - atomic_t enabled; + + /* + * ->enabled is the master enable switch gating the throttling logic and + * inflight tracking. The number of cgroups which have iolat enabled is + * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly + * from ->enable_work with the request_queue frozen. For details, See + * blkiolatency_enable_work_fn(). + */ + bool enabled; + atomic_t enable_cnt; + struct work_struct enable_work; }; static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos) @@ -94,11 +104,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos) return container_of(rqos, struct blk_iolatency, rqos); } -static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat) -{ - return atomic_read(&blkiolat->enabled) > 0; -} - struct child_latency_info { spinlock_t lock; @@ -463,7 +468,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) struct blkcg_gq *blkg = bio->bi_blkg; bool issue_as_root = bio_issue_as_root_blkg(bio); - if (!blk_iolatency_enabled(blkiolat)) + if (!blkiolat->enabled) return; while (blkg && blkg->parent) { @@ -593,7 +598,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) u64 window_start; u64 now; bool issue_as_root = bio_issue_as_root_blkg(bio); - bool enabled = false; int inflight = 0; blkg = bio->bi_blkg; @@ -604,8 +608,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) if (!iolat) return; - enabled = blk_iolatency_enabled(iolat->blkiolat); - if (!enabled) + if (!iolat->blkiolat->enabled) return; now = ktime_to_ns(ktime_get()); @@ -644,6 +647,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos) struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); del_timer_sync(&blkiolat->timer); + flush_work(&blkiolat->enable_work); blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency); kfree(blkiolat); } @@ -715,6 +719,44 @@ static void blkiolatency_timer_fn(struct timer_list *t) rcu_read_unlock(); } +/** + * blkiolatency_enable_work_fn - Enable or disable iolatency on the device + * @work: enable_work of the blk_iolatency of interest + * + * iolatency needs to keep track of the number of in-flight IOs per cgroup. This + * is relatively expensive as it involves walking up the hierarchy twice for + * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we + * want to disable the in-flight tracking. + * + * We have to make sure that the counting is balanced - we don't want to leak + * the in-flight counts by disabling accounting in the completion path while IOs + * are in flight. This is achieved by ensuring that no IO is in flight by + * freezing the queue while flipping ->enabled. As this requires a sleepable + * context, ->enabled flipping is punted to this work function. + */ +static void blkiolatency_enable_work_fn(struct work_struct *work) +{ + struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency, + enable_work); + bool enabled; + + /* + * There can only be one instance of this function running for @blkiolat + * and it's guaranteed to be executed at least once after the latest + * ->enabled_cnt modification. Acting on the latest ->enable_cnt is + * sufficient. + * + * Also, we know @blkiolat is safe to access as ->enable_work is flushed + * in blkcg_iolatency_exit(). + */ + enabled = atomic_read(&blkiolat->enable_cnt); + if (enabled != blkiolat->enabled) { + blk_mq_freeze_queue(blkiolat->rqos.q); + blkiolat->enabled = enabled; + blk_mq_unfreeze_queue(blkiolat->rqos.q); + } +} + int blk_iolatency_init(struct request_queue *q) { struct blk_iolatency *blkiolat; @@ -740,17 +782,15 @@ int blk_iolatency_init(struct request_queue *q) } timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0); + INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn); return 0; } -/* - * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise - * return 0. - */ -static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) +static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) { struct iolatency_grp *iolat = blkg_to_lat(blkg); + struct blk_iolatency *blkiolat = iolat->blkiolat; u64 oldval = iolat->min_lat_nsec; iolat->min_lat_nsec = val; @@ -758,13 +798,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec, BLKIOLATENCY_MAX_WIN_SIZE); - if (!oldval && val) - return 1; + if (!oldval && val) { + if (atomic_inc_return(&blkiolat->enable_cnt) == 1) + schedule_work(&blkiolat->enable_work); + } if (oldval && !val) { blkcg_clear_delay(blkg); - return -1; + if (atomic_dec_return(&blkiolat->enable_cnt) == 0) + schedule_work(&blkiolat->enable_work); } - return 0; } static void iolatency_clear_scaling(struct blkcg_gq *blkg) @@ -796,7 +838,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, u64 lat_val = 0; u64 oldval; int ret; - int enable = 0; ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); if (ret) @@ -831,41 +872,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, blkg = ctx.blkg; oldval = iolat->min_lat_nsec; - enable = iolatency_set_min_lat_nsec(blkg, lat_val); - if (enable) { - if (!blk_get_queue(blkg->q)) { - ret = -ENODEV; - goto out; - } - - blkg_get(blkg); - } - - if (oldval != iolat->min_lat_nsec) { + iolatency_set_min_lat_nsec(blkg, lat_val); + if (oldval != iolat->min_lat_nsec) iolatency_clear_scaling(blkg); - } - ret = 0; out: blkg_conf_finish(&ctx); - if (ret == 0 && enable) { - struct iolatency_grp *tmp = blkg_to_lat(blkg); - struct blk_iolatency *blkiolat = tmp->blkiolat; - - blk_mq_freeze_queue(blkg->q); - - if (enable == 1) - atomic_inc(&blkiolat->enabled); - else if (enable == -1) - atomic_dec(&blkiolat->enabled); - else - WARN_ON_ONCE(1); - - blk_mq_unfreeze_queue(blkg->q); - - blkg_put(blkg); - blk_put_queue(blkg->q); - } return ret ?: nbytes; } @@ -1006,14 +1018,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); - struct blk_iolatency *blkiolat = iolat->blkiolat; - int ret; - ret = iolatency_set_min_lat_nsec(blkg, 0); - if (ret == 1) - atomic_inc(&blkiolat->enabled); - if (ret == -1) - atomic_dec(&blkiolat->enabled); + iolatency_set_min_lat_nsec(blkg, 0); iolatency_clear_scaling(blkg); } diff --git a/block/blk-mq.c b/block/blk-mq.c index db5146aec24e..5368379c96d2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -470,6 +470,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, if (!blk_mq_hw_queue_mapped(data.hctx)) goto out_queue_exit; cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask); + if (cpu >= nr_cpu_ids) + goto out_queue_exit; data.ctx = __blk_mq_get_ctx(q, cpu); if (!q->elevator) diff --git a/certs/blacklist_hashes.c b/certs/blacklist_hashes.c index 344892337be0..d5961aa3d338 100644 --- a/certs/blacklist_hashes.c +++ b/certs/blacklist_hashes.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "blacklist.h" -const char __initdata *const blacklist_hashes[] = { +const char __initconst *const blacklist_hashes[] = { #include CONFIG_SYSTEM_BLACKLIST_HASH_LIST , NULL }; diff --git a/crypto/Kconfig b/crypto/Kconfig index a86d17ebe6fa..b604dd6a8faf 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -15,6 +15,7 @@ source "crypto/async_tx/Kconfig" # menuconfig CRYPTO tristate "Cryptographic API" + select LIB_MEMNEQ help This option provides the core Cryptographic API. @@ -1966,7 +1967,6 @@ config CRYPTO_STATS config CRYPTO_HASH_INFO bool -source "lib/crypto/Kconfig" source "drivers/crypto/Kconfig" source "crypto/asymmetric_keys/Kconfig" source "certs/Kconfig" diff --git a/crypto/Makefile b/crypto/Makefile index f8a3677d2eec..2785e5fab9e7 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -4,7 +4,7 @@ # obj-$(CONFIG_CRYPTO) += crypto.o -crypto-y := api.o cipher.o compress.o memneq.o +crypto-y := api.o cipher.o compress.o obj-$(CONFIG_CRYPTO_ENGINE) += crypto_engine.o obj-$(CONFIG_CRYPTO_FIPS) += fips.o @@ -211,7 +211,7 @@ UBSAN_SANITIZE_jitterentropy-fips.o = n # module that is in scope for FIPS 140-2 certification # crypto-fips-objs := drbg.o ecb.o cbc.o ctr.o cts.o gcm.o xts.o hmac.o cmac.o \ - memneq.o gf128mul.o aes_generic.o lib-crypto-aes.o \ + gf128mul.o aes_generic.o lib-crypto-aes.o \ jitterentropy.o jitterentropy-kcapi.o \ sha1_generic.o sha256_generic.o sha512_generic.o \ lib-sha1.o lib-crypto-sha256.o diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c index 72fe480f9bd6..5f96a21f8788 100644 --- a/crypto/blake2s_generic.c +++ b/crypto/blake2s_generic.c @@ -15,12 +15,12 @@ static int crypto_blake2s_update_generic(struct shash_desc *desc, const u8 *in, unsigned int inlen) { - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_generic); + return crypto_blake2s_update(desc, in, inlen, true); } static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out) { - return crypto_blake2s_final(desc, out, blake2s_compress_generic); + return crypto_blake2s_final(desc, out, true); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ diff --git a/crypto/cryptd.c b/crypto/cryptd.c index a1bea0f4baa8..668095eca0fa 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -39,6 +39,10 @@ struct cryptd_cpu_queue { }; struct cryptd_queue { + /* + * Protected by disabling BH to allow enqueueing from softinterrupt and + * dequeuing from kworker (cryptd_queue_worker()). + */ struct cryptd_cpu_queue __percpu *cpu_queue; }; @@ -125,28 +129,28 @@ static void cryptd_fini_queue(struct cryptd_queue *queue) static int cryptd_enqueue_request(struct cryptd_queue *queue, struct crypto_async_request *request) { - int cpu, err; + int err; struct cryptd_cpu_queue *cpu_queue; refcount_t *refcnt; - cpu = get_cpu(); + local_bh_disable(); cpu_queue = this_cpu_ptr(queue->cpu_queue); err = crypto_enqueue_request(&cpu_queue->queue, request); refcnt = crypto_tfm_ctx(request->tfm); if (err == -ENOSPC) - goto out_put_cpu; + goto out; - queue_work_on(cpu, cryptd_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work); if (!refcount_read(refcnt)) - goto out_put_cpu; + goto out; refcount_inc(refcnt); -out_put_cpu: - put_cpu(); +out: + local_bh_enable(); return err; } @@ -162,15 +166,10 @@ static void cryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct cryptd_cpu_queue, work); /* * Only handle one request at a time to avoid hogging crypto workqueue. - * preempt_disable/enable is used to prevent being preempted by - * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent - * cryptd_enqueue_request() being accessed from software interrupts. */ local_bh_disable(); - preempt_disable(); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); local_bh_enable(); if (!req) diff --git a/crypto/drbg.c b/crypto/drbg.c index 1b4587e0ddad..c769c4476abb 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1036,17 +1036,38 @@ static const struct drbg_state_ops drbg_hash_ops = { ******************************************************************/ static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed, - int reseed) + int reseed, enum drbg_seed_state new_seed_state) { int ret = drbg->d_ops->update(drbg, seed, reseed); if (ret) return ret; - drbg->seeded = true; + drbg->seeded = new_seed_state; /* 10.1.1.2 / 10.1.1.3 step 5 */ drbg->reseed_ctr = 1; + switch (drbg->seeded) { + case DRBG_SEED_STATE_UNSEEDED: + /* Impossible, but handle it to silence compiler warnings. */ + fallthrough; + case DRBG_SEED_STATE_PARTIAL: + /* + * Require frequent reseeds until the seed source is + * fully initialized. + */ + drbg->reseed_threshold = 50; + break; + + case DRBG_SEED_STATE_FULL: + /* + * Seed source has become fully initialized, frequent + * reseeds no longer required. + */ + drbg->reseed_threshold = drbg_max_requests(drbg); + break; + } + return ret; } @@ -1066,12 +1087,10 @@ static inline int drbg_get_random_bytes(struct drbg_state *drbg, return 0; } -static void drbg_async_seed(struct work_struct *work) +static int drbg_seed_from_random(struct drbg_state *drbg) { struct drbg_string data; LIST_HEAD(seedlist); - struct drbg_state *drbg = container_of(work, struct drbg_state, - seed_work); unsigned int entropylen = drbg_sec_strength(drbg->core->flags); unsigned char entropy[32]; int ret; @@ -1082,26 +1101,15 @@ static void drbg_async_seed(struct work_struct *work) drbg_string_fill(&data, entropy, entropylen); list_add_tail(&data.list, &seedlist); - mutex_lock(&drbg->drbg_mutex); - ret = drbg_get_random_bytes(drbg, entropy, entropylen); if (ret) - goto unlock; + goto out; - /* Set seeded to false so that if __drbg_seed fails the - * next generate call will trigger a reseed. - */ - drbg->seeded = false; - - __drbg_seed(drbg, &seedlist, true); - - if (drbg->seeded) - drbg->reseed_threshold = drbg_max_requests(drbg); - -unlock: - mutex_unlock(&drbg->drbg_mutex); + ret = __drbg_seed(drbg, &seedlist, true, DRBG_SEED_STATE_FULL); +out: memzero_explicit(entropy, entropylen); + return ret; } /* @@ -1123,6 +1131,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, unsigned int entropylen = drbg_sec_strength(drbg->core->flags); struct drbg_string data1; LIST_HEAD(seedlist); + enum drbg_seed_state new_seed_state = DRBG_SEED_STATE_FULL; /* 9.1 / 9.2 / 9.3.1 step 3 */ if (pers && pers->len > (drbg_max_addtl(drbg))) { @@ -1150,6 +1159,9 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, BUG_ON((entropylen * 2) > sizeof(entropy)); /* Get seed from in-kernel /dev/urandom */ + if (!rng_is_initialized()) + new_seed_state = DRBG_SEED_STATE_PARTIAL; + ret = drbg_get_random_bytes(drbg, entropy, entropylen); if (ret) goto out; @@ -1206,7 +1218,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, memset(drbg->C, 0, drbg_statelen(drbg)); } - ret = __drbg_seed(drbg, &seedlist, reseed); + ret = __drbg_seed(drbg, &seedlist, reseed, new_seed_state); out: memzero_explicit(entropy, entropylen * 2); @@ -1386,19 +1398,25 @@ static int drbg_generate(struct drbg_state *drbg, * here. The spec is a bit convoluted here, we make it simpler. */ if (drbg->reseed_threshold < drbg->reseed_ctr) - drbg->seeded = false; + drbg->seeded = DRBG_SEED_STATE_UNSEEDED; - if (drbg->pr || !drbg->seeded) { + if (drbg->pr || drbg->seeded == DRBG_SEED_STATE_UNSEEDED) { pr_devel("DRBG: reseeding before generation (prediction " "resistance: %s, state %s)\n", drbg->pr ? "true" : "false", - drbg->seeded ? "seeded" : "unseeded"); + (drbg->seeded == DRBG_SEED_STATE_FULL ? + "seeded" : "unseeded")); /* 9.3.1 steps 7.1 through 7.3 */ len = drbg_seed(drbg, addtl, true); if (len) goto err; /* 9.3.1 step 7.4 */ addtl = NULL; + } else if (rng_is_initialized() && + drbg->seeded == DRBG_SEED_STATE_PARTIAL) { + len = drbg_seed_from_random(drbg); + if (len) + goto err; } if (addtl && 0 < addtl->len) @@ -1491,51 +1509,15 @@ static int drbg_generate_long(struct drbg_state *drbg, return 0; } -static void drbg_schedule_async_seed(struct random_ready_callback *rdy) -{ - struct drbg_state *drbg = container_of(rdy, struct drbg_state, - random_ready); - - schedule_work(&drbg->seed_work); -} - static int drbg_prepare_hrng(struct drbg_state *drbg) { - int err; - /* We do not need an HRNG in test mode. */ if (list_empty(&drbg->test_data.list)) return 0; drbg->jent = crypto_alloc_rng("jitterentropy_rng", 0, 0); - INIT_WORK(&drbg->seed_work, drbg_async_seed); - - drbg->random_ready.owner = THIS_MODULE; - drbg->random_ready.func = drbg_schedule_async_seed; - - err = add_random_ready_callback(&drbg->random_ready); - - switch (err) { - case 0: - break; - - case -EALREADY: - err = 0; - fallthrough; - - default: - drbg->random_ready.func = NULL; - return err; - } - - /* - * Require frequent reseeds until the seed source is fully - * initialized. - */ - drbg->reseed_threshold = 50; - - return err; + return 0; } /* @@ -1578,7 +1560,7 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, if (!drbg->core) { drbg->core = &drbg_cores[coreref]; drbg->pr = pr; - drbg->seeded = false; + drbg->seeded = DRBG_SEED_STATE_UNSEEDED; drbg->reseed_threshold = drbg_max_requests(drbg); ret = drbg_alloc_state(drbg); @@ -1629,11 +1611,6 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, */ static int drbg_uninstantiate(struct drbg_state *drbg) { - if (drbg->random_ready.func) { - del_random_ready_callback(&drbg->random_ready); - cancel_work_sync(&drbg->seed_work); - } - if (!IS_ERR_OR_NULL(drbg->jent)) crypto_free_rng(drbg->jent); drbg->jent = NULL; diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index 6a3fd09057d0..f7ed43020672 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -113,15 +113,15 @@ static int ecrdsa_verify(struct akcipher_request *req) /* Step 1: verify that 0 < r < q, 0 < s < q */ if (vli_is_zero(r, ndigits) || - vli_cmp(r, ctx->curve->n, ndigits) == 1 || + vli_cmp(r, ctx->curve->n, ndigits) >= 0 || vli_is_zero(s, ndigits) || - vli_cmp(s, ctx->curve->n, ndigits) == 1) + vli_cmp(s, ctx->curve->n, ndigits) >= 0) return -EKEYREJECTED; /* Step 2: calculate hash (h) of the message (passed as input) */ /* Step 3: calculate e = h \mod q */ vli_from_le64(e, digest, ndigits); - if (vli_cmp(e, ctx->curve->n, ndigits) == 1) + if (vli_cmp(e, ctx->curve->n, ndigits) >= 0) vli_sub(e, e, ctx->curve->n, ndigits); if (vli_is_zero(e, ndigits)) e[0] = 1; @@ -137,7 +137,7 @@ static int ecrdsa_verify(struct akcipher_request *req) /* Step 6: calculate point C = z_1P + z_2Q, and R = x_c \mod q */ ecc_point_mult_shamir(&cc, z1, &ctx->curve->g, z2, &ctx->pub_key, ctx->curve); - if (vli_cmp(cc.x, ctx->curve->n, ndigits) == 1) + if (vli_cmp(cc.x, ctx->curve->n, ndigits) >= 0) vli_sub(cc.x, cc.x, ctx->curve->n, ndigits); /* Step 7: if R == r signature is valid */ diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c index 6284aff434a1..da3a24557a03 100644 --- a/drivers/accessibility/speakup/spk_ttyio.c +++ b/drivers/accessibility/speakup/spk_ttyio.c @@ -88,7 +88,7 @@ static int spk_ttyio_receive_buf2(struct tty_struct *tty, } if (!ldisc_data->buf_free) - /* ttyio_in will tty_schedule_flip */ + /* ttyio_in will tty_flip_buffer_push */ return 0; /* Make sure the consumer has read buf before we have seen @@ -334,7 +334,7 @@ static unsigned char ttyio_in(int timeout) mb(); ldisc_data->buf_free = true; /* Let TTY push more characters */ - tty_schedule_flip(speakup_tty->port); + tty_flip_buffer_push(speakup_tty->port); return rv; } diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c index 598fd19b65fa..45973aa6e06d 100644 --- a/drivers/acpi/apei/bert.c +++ b/drivers/acpi/apei/bert.c @@ -29,16 +29,26 @@ #undef pr_fmt #define pr_fmt(fmt) "BERT: " fmt + +#define ACPI_BERT_PRINT_MAX_RECORDS 5 #define ACPI_BERT_PRINT_MAX_LEN 1024 static int bert_disable; +/* + * Print "all" the error records in the BERT table, but avoid huge spam to + * the console if the BIOS included oversize records, or too many records. + * Skipping some records here does not lose anything because the full + * data is available to user tools in: + * /sys/firmware/acpi/tables/data/BERT + */ static void __init bert_print_all(struct acpi_bert_region *region, unsigned int region_len) { struct acpi_hest_generic_status *estatus = (struct acpi_hest_generic_status *)region; int remain = region_len; + int printed = 0, skipped = 0; u32 estatus_len; while (remain >= sizeof(struct acpi_bert_region)) { @@ -46,24 +56,26 @@ static void __init bert_print_all(struct acpi_bert_region *region, if (remain < estatus_len) { pr_err(FW_BUG "Truncated status block (length: %u).\n", estatus_len); - return; + break; } /* No more error records. */ if (!estatus->block_status) - return; + break; if (cper_estatus_check(estatus)) { pr_err(FW_BUG "Invalid error record.\n"); - return; + break; } - pr_info_once("Error records from previous boot:\n"); - if (region_len < ACPI_BERT_PRINT_MAX_LEN) + if (estatus_len < ACPI_BERT_PRINT_MAX_LEN && + printed < ACPI_BERT_PRINT_MAX_RECORDS) { + pr_info_once("Error records from previous boot:\n"); cper_estatus_print(KERN_INFO HW_ERR, estatus); - else - pr_info_once("Max print length exceeded, table data is available at:\n" - "/sys/firmware/acpi/tables/data/BERT"); + printed++; + } else { + skipped++; + } /* * Because the boot error source is "one-time polled" type, @@ -75,6 +87,9 @@ static void __init bert_print_all(struct acpi_bert_region *region, estatus = (void *)estatus + estatus_len; remain -= estatus_len; } + + if (skipped) + pr_info(HW_ERR "Skipped %d error records\n", skipped); } static int __init setup_bert_disable(char *str) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 35ecb57514ca..1f57c6838b86 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -433,6 +433,16 @@ void acpi_init_properties(struct acpi_device *adev) acpi_extract_apple_properties(adev); } +static void acpi_free_device_properties(struct list_head *list) +{ + struct acpi_device_properties *props, *tmp; + + list_for_each_entry_safe(props, tmp, list, list) { + list_del(&props->list); + kfree(props); + } +} + static void acpi_destroy_nondev_subnodes(struct list_head *list) { struct acpi_data_node *dn, *next; @@ -445,22 +455,18 @@ static void acpi_destroy_nondev_subnodes(struct list_head *list) wait_for_completion(&dn->kobj_done); list_del(&dn->sibling); ACPI_FREE((void *)dn->data.pointer); + acpi_free_device_properties(&dn->data.properties); kfree(dn); } } void acpi_free_properties(struct acpi_device *adev) { - struct acpi_device_properties *props, *tmp; - acpi_destroy_nondev_subnodes(&adev->data.subnodes); ACPI_FREE((void *)adev->data.pointer); adev->data.of_compatible = NULL; adev->data.pointer = NULL; - list_for_each_entry_safe(props, tmp, &adev->data.properties, list) { - list_del(&props->list); - kfree(props); - } + acpi_free_device_properties(&adev->data.properties); } /** diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 503935b1deeb..cfda5720de02 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -377,6 +377,18 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"), }, }, + /* + * ASUS B1400CEAE hangs on resume from suspend (see + * https://bugzilla.kernel.org/show_bug.cgi?id=215742). + */ + { + .callback = init_default_s3, + .ident = "ASUS B1400CEAE", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK B1400CEAE"), + }, + }, {}, }; diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index a5cc4f3bb1e3..1d94c4625f36 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -439,18 +439,29 @@ static ssize_t acpi_data_show(struct file *filp, struct kobject *kobj, { struct acpi_data_attr *data_attr; void __iomem *base; - ssize_t rc; + ssize_t size; data_attr = container_of(bin_attr, struct acpi_data_attr, attr); + size = data_attr->attr.size; - base = acpi_os_map_memory(data_attr->addr, data_attr->attr.size); + if (offset < 0) + return -EINVAL; + + if (offset >= size) + return 0; + + if (count > size - offset) + count = size - offset; + + base = acpi_os_map_iomem(data_attr->addr, size); if (!base) return -ENOMEM; - rc = memory_read_from_buffer(buf, count, &offset, base, - data_attr->attr.size); - acpi_os_unmap_memory(base, data_attr->attr.size); - return rc; + memcpy_fromio(buf, base + offset, count); + + acpi_os_unmap_iomem(base, size); + + return count; } static int acpi_bert_data_init(void *th, struct acpi_data_attr *data_attr) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 7b9793cb55c5..e39d59ad6496 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -424,23 +424,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = { .callback = video_detect_force_native, .ident = "Clevo NL5xRU", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), - DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), - }, - }, - { - .callback = video_detect_force_native, - .ident = "Clevo NL5xRU", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), - DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), - }, - }, - { - .callback = video_detect_force_native, - .ident = "Clevo NL5xRU", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), }, }, @@ -463,28 +446,60 @@ static const struct dmi_system_id video_detect_dmi_table[] = { { .callback = video_detect_force_native, .ident = "Clevo NL5xNU", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + }, + }, + /* + * The TongFang PF5PU1G, PF4NU1F, PF5NU1G, and PF5LUXG/TUXEDO BA15 Gen10, + * Pulse 14/15 Gen1, and Pulse 15 Gen2 have the same problem as the Clevo + * NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2. See the description + * above. + */ + { + .callback = video_detect_force_native, + .ident = "TongFang PF5PU1G", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF5PU1G"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF4NU1F", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF4NU1F"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF4NU1F", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), - DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + DMI_MATCH(DMI_BOARD_NAME, "PULSE1401"), }, }, { .callback = video_detect_force_native, - .ident = "Clevo NL5xNU", + .ident = "TongFang PF5NU1G", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), - DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + DMI_MATCH(DMI_BOARD_NAME, "PF5NU1G"), }, }, { .callback = video_detect_force_native, - .ident = "Clevo NL5xNU", + .ident = "TongFang PF5NU1G", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), - DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "PULSE1501"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "TongFang PF5LUXG", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PF5LUXG"), }, }, - /* * Desktops which falsely report a backlight and which our heuristics * for this do not catch. diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index f963a0a7da46..2402fa4d8aa5 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5475,7 +5475,7 @@ struct ata_host *ata_host_alloc_pinfo(struct device *dev, const struct ata_port_info * const * ppi, int n_ports) { - const struct ata_port_info *pi; + const struct ata_port_info *pi = &ata_dummy_port_info; struct ata_host *host; int i, j; @@ -5483,7 +5483,7 @@ struct ata_host *ata_host_alloc_pinfo(struct device *dev, if (!host) return NULL; - for (i = 0, j = 0, pi = NULL; i < host->n_ports; i++) { + for (i = 0, j = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; if (ppi[j]) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 6a40e3c6cf49..b33772df9bc6 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -196,7 +196,7 @@ static struct { { XFER_PIO_0, "XFER_PIO_0" }, { XFER_PIO_SLOW, "XFER_PIO_SLOW" } }; -ata_bitfield_name_match(xfer,ata_xfer_names) +ata_bitfield_name_search(xfer, ata_xfer_names) /* * ATA Port attributes diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index b5a3f710d76d..4cc8a1027888 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -888,12 +888,14 @@ static int octeon_cf_probe(struct platform_device *pdev) int i; res_dma = platform_get_resource(dma_dev, IORESOURCE_MEM, 0); if (!res_dma) { + put_device(&dma_dev->dev); of_node_put(dma_node); return -EINVAL; } cf_port->dma_base = (u64)devm_ioremap(&pdev->dev, res_dma->start, resource_size(res_dma)); if (!cf_port->dma_base) { + put_device(&dma_dev->dev); of_node_put(dma_node); return -EINVAL; } @@ -903,6 +905,7 @@ static int octeon_cf_probe(struct platform_device *pdev) irq = i; irq_handler = octeon_cf_interrupt; } + put_device(&dma_dev->dev); } of_node_put(dma_node); } diff --git a/drivers/base/bus.c b/drivers/base/bus.c index a9c23ecebc7c..df85e928b97f 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -621,7 +621,7 @@ int bus_add_driver(struct device_driver *drv) if (drv->bus->p->drivers_autoprobe) { error = driver_attach(drv); if (error) - goto out_unregister; + goto out_del_list; } module_add_driver(drv->owner, drv); @@ -648,6 +648,8 @@ int bus_add_driver(struct device_driver *drv) return 0; +out_del_list: + klist_del(&priv->knode_bus); out_unregister: kobject_put(&priv->kobj); /* drv->p is freed in driver_release() */ diff --git a/drivers/base/core.c b/drivers/base/core.c index 4dbe8276f579..23fce12a5486 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -470,7 +470,8 @@ static void device_link_release_fn(struct work_struct *work) /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); - pm_runtime_release_supplier(link, true); + pm_runtime_release_supplier(link); + pm_request_idle(link->supplier); put_device(link->consumer); put_device(link->supplier); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 8f1d6569564c..24dd532c8e5e 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -566,6 +566,18 @@ ssize_t __weak cpu_show_srbds(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -575,6 +587,8 @@ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); +static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); +static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -586,6 +600,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_tsx_async_abort.attr, &dev_attr_itlb_multihit.attr, &dev_attr_srbds.attr, + &dev_attr_mmio_stale_data.attr, + &dev_attr_retbleed.attr, NULL }; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index b1b8f480a752..272087f361e6 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -250,7 +250,6 @@ DEFINE_SHOW_ATTRIBUTE(deferred_devs); int driver_deferred_probe_timeout; EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout); -static DECLARE_WAIT_QUEUE_HEAD(probe_timeout_waitqueue); static int __init deferred_probe_timeout_setup(char *str) { @@ -302,7 +301,6 @@ static void deferred_probe_timeout_work_func(struct work_struct *work) list_for_each_entry(p, &deferred_probe_pending_list, deferred_probe) dev_info(p->device, "deferred probe pending\n"); mutex_unlock(&deferred_probe_mutex); - wake_up_all(&probe_timeout_waitqueue); } static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func); @@ -706,9 +704,6 @@ int driver_probe_done(void) */ void wait_for_device_probe(void) { - /* wait for probe timeout */ - wait_event(probe_timeout_waitqueue, !driver_deferred_probe_timeout); - /* wait for the deferred probe workqueue to finish */ flush_work(&deferred_probe_work); @@ -897,6 +892,7 @@ static void __device_attach_async_helper(void *_dev, async_cookie_t cookie) static int __device_attach(struct device *dev, bool allow_async) { int ret = 0; + bool async = false; device_lock(dev); if (dev->p->dead) { @@ -935,7 +931,7 @@ static int __device_attach(struct device *dev, bool allow_async) */ dev_dbg(dev, "scheduling asynchronous probe\n"); get_device(dev); - async_schedule_dev(__device_attach_async_helper, dev); + async = true; } else { pm_request_idle(dev); } @@ -945,6 +941,8 @@ static int __device_attach(struct device *dev, bool allow_async) } out_unlock: device_unlock(dev); + if (async) + async_schedule_dev(__device_attach_async_helper, dev); return ret; } diff --git a/drivers/base/memory.c b/drivers/base/memory.c index de058d15b33e..49eb14271f28 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -560,10 +560,9 @@ int register_memory(struct memory_block *memory) } ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory, GFP_KERNEL)); - if (ret) { - put_device(&memory->dev); + if (ret) device_unregister(&memory->dev); - } + return ret; } diff --git a/drivers/base/node.c b/drivers/base/node.c index 21965de8538b..5f745c906c33 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -655,6 +655,7 @@ static int register_node(struct node *node, int num) */ void unregister_node(struct node *node) { + compaction_unregister_node(node); hugetlb_unregister_node(node); /* no-op, if memoryless node */ node_remove_accesses(node); node_remove_caches(node); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 157331940488..835a39e84c1d 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -308,13 +308,10 @@ static int rpm_get_suppliers(struct device *dev) /** * pm_runtime_release_supplier - Drop references to device link's supplier. * @link: Target device link. - * @check_idle: Whether or not to check if the supplier device is idle. * - * Drop all runtime PM references associated with @link to its supplier device - * and if @check_idle is set, check if that device is idle (and so it can be - * suspended). + * Drop all runtime PM references associated with @link to its supplier device. */ -void pm_runtime_release_supplier(struct device_link *link, bool check_idle) +void pm_runtime_release_supplier(struct device_link *link) { struct device *supplier = link->supplier; @@ -327,9 +324,6 @@ void pm_runtime_release_supplier(struct device_link *link, bool check_idle) while (refcount_dec_not_one(&link->rpm_active) && atomic_read(&supplier->power.usage_count) > 0) pm_runtime_put_noidle(supplier); - - if (check_idle) - pm_request_idle(supplier); } static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) @@ -337,8 +331,11 @@ static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) struct device_link *link; list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) - pm_runtime_release_supplier(link, try_to_suspend); + device_links_read_lock_held()) { + pm_runtime_release_supplier(link); + if (try_to_suspend) + pm_request_idle(link->supplier); + } } static void rpm_put_suppliers(struct device *dev) @@ -1776,7 +1773,8 @@ void pm_runtime_drop_link(struct device_link *link) return; pm_runtime_drop_link_count(link->consumer); - pm_runtime_release_supplier(link, true); + pm_runtime_release_supplier(link); + pm_request_idle(link->supplier); } static bool pm_runtime_need_not_resume(struct device *dev) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 87c5c421e0f4..4466f8bdab2e 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -220,6 +220,7 @@ static void regmap_irq_enable(struct irq_data *data) struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data); struct regmap *map = d->map; const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq); + unsigned int reg = irq_data->reg_offset / map->reg_stride; unsigned int mask, type; type = irq_data->type.type_falling_val | irq_data->type.type_rising_val; @@ -236,14 +237,14 @@ static void regmap_irq_enable(struct irq_data *data) * at the corresponding offset in regmap_irq_set_type(). */ if (d->chip->type_in_mask && type) - mask = d->type_buf[irq_data->reg_offset / map->reg_stride]; + mask = d->type_buf[reg] & irq_data->mask; else mask = irq_data->mask; if (d->chip->clear_on_unmask) d->clear_status = true; - d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~mask; + d->mask_buf[reg] &= ~mask; } static void regmap_irq_disable(struct irq_data *data) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 65b95aef8dbc..407527ff6b1f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -184,7 +184,7 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, unsigned int set_size) { struct drbd_request *r; - struct drbd_request *req = NULL; + struct drbd_request *req = NULL, *tmp = NULL; int expect_epoch = 0; int expect_size = 0; @@ -238,8 +238,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, * to catch requests being barrier-acked "unexpectedly". * It usually should find the same req again, or some READ preceding it. */ list_for_each_entry(req, &connection->transfer_log, tl_requests) - if (req->epoch == expect_epoch) + if (req->epoch == expect_epoch) { + tmp = req; break; + } + req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests); list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { if (req->epoch != expect_epoch) break; @@ -3628,9 +3631,8 @@ const char *cmdname(enum drbd_packet cmd) * when we want to support more than * one PRO_VERSION */ static const char *cmdnames[] = { + [P_DATA] = "Data", - [P_WSAME] = "WriteSame", - [P_TRIM] = "Trim", [P_DATA_REPLY] = "DataReply", [P_RS_DATA_REPLY] = "RSDataReply", [P_BARRIER] = "Barrier", @@ -3641,7 +3643,6 @@ const char *cmdname(enum drbd_packet cmd) [P_DATA_REQUEST] = "DataRequest", [P_RS_DATA_REQUEST] = "RSDataRequest", [P_SYNC_PARAM] = "SyncParam", - [P_SYNC_PARAM89] = "SyncParam89", [P_PROTOCOL] = "ReportProtocol", [P_UUIDS] = "ReportUUIDs", [P_SIZES] = "ReportSizes", @@ -3649,6 +3650,7 @@ const char *cmdname(enum drbd_packet cmd) [P_SYNC_UUID] = "ReportSyncUUID", [P_AUTH_CHALLENGE] = "AuthChallenge", [P_AUTH_RESPONSE] = "AuthResponse", + [P_STATE_CHG_REQ] = "StateChgRequest", [P_PING] = "Ping", [P_PING_ACK] = "PingAck", [P_RECV_ACK] = "RecvAck", @@ -3659,24 +3661,26 @@ const char *cmdname(enum drbd_packet cmd) [P_NEG_DREPLY] = "NegDReply", [P_NEG_RS_DREPLY] = "NegRSDReply", [P_BARRIER_ACK] = "BarrierAck", - [P_STATE_CHG_REQ] = "StateChgRequest", [P_STATE_CHG_REPLY] = "StateChgReply", [P_OV_REQUEST] = "OVRequest", [P_OV_REPLY] = "OVReply", [P_OV_RESULT] = "OVResult", [P_CSUM_RS_REQUEST] = "CsumRSRequest", [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", + [P_SYNC_PARAM89] = "SyncParam89", [P_COMPRESSED_BITMAP] = "CBitmap", [P_DELAY_PROBE] = "DelayProbe", [P_OUT_OF_SYNC] = "OutOfSync", - [P_RETRY_WRITE] = "RetryWrite", [P_RS_CANCEL] = "RSCancel", [P_CONN_ST_CHG_REQ] = "conn_st_chg_req", [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply", [P_RETRY_WRITE] = "retry_write", [P_PROTOCOL_UPDATE] = "protocol_update", + [P_TRIM] = "Trim", [P_RS_THIN_REQ] = "rs_thin_req", [P_RS_DEALLOCATED] = "rs_deallocated", + [P_WSAME] = "WriteSame", + [P_ZEROES] = "Zeroes", /* enum drbd_packet, but not commands - obsoleted flags: * P_MAY_IGNORE diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c9411fe2f0af..4ef407a33996 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -509,8 +509,8 @@ static unsigned long fdc_busy; static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); static DECLARE_WAIT_QUEUE_HEAD(command_done); -/* Errors during formatting are counted here. */ -static int format_errors; +/* errors encountered on the current (or last) request */ +static int floppy_errors; /* Format request descriptor. */ static struct format_descr format_req; @@ -530,7 +530,6 @@ static struct format_descr format_req; static char *floppy_track_buffer; static int max_buffer_sectors; -static int *errors; typedef void (*done_f)(int); static const struct cont_t { void (*interrupt)(void); @@ -1455,7 +1454,7 @@ static int interpret_errors(void) if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); bad = 0; - } else if (*errors >= drive_params[current_drive].max_errors.reporting) { + } else if (floppy_errors >= drive_params[current_drive].max_errors.reporting) { print_errors(); } if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC) @@ -2095,7 +2094,7 @@ static void bad_flp_intr(void) if (!next_valid_format(current_drive)) return; } - err_count = ++(*errors); + err_count = ++floppy_errors; INFBOUND(write_errors[current_drive].badness, err_count); if (err_count > drive_params[current_drive].max_errors.abort) cont->done(0); @@ -2240,9 +2239,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req) return -EINVAL; } format_req = *tmp_format_req; - format_errors = 0; cont = &format_cont; - errors = &format_errors; + floppy_errors = 0; ret = wait_til_done(redo_format, true); if (ret == -EINTR) return -EINTR; @@ -2721,7 +2719,7 @@ static int make_raw_rw_request(void) */ if (!direct || (indirect * 2 > direct * 3 && - *errors < drive_params[current_drive].max_errors.read_track && + floppy_errors < drive_params[current_drive].max_errors.read_track && ((!probing || (drive_params[current_drive].read_track & (1 << drive_state[current_drive].probed_format)))))) { max_size = blk_rq_sectors(current_req); @@ -2846,10 +2844,11 @@ static int set_next_request(void) current_req = list_first_entry_or_null(&floppy_reqs, struct request, queuelist); if (current_req) { - current_req->error_count = 0; + floppy_errors = 0; list_del_init(¤t_req->queuelist); + return 1; } - return current_req != NULL; + return 0; } /* Starts or continues processing request. Will automatically unlock the @@ -2908,7 +2907,6 @@ static void redo_fd_request(void) _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format]; } else probing = 0; - errors = &(current_req->error_count); tmp = make_raw_rw_request(); if (tmp < 2) { request_done(tmp); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 59c452fff835..4a6b82d434ee 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -880,11 +880,15 @@ static int wait_for_reconnect(struct nbd_device *nbd) struct nbd_config *config = nbd->config; if (!config->dead_conn_timeout) return 0; - if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags)) + + if (!wait_event_timeout(config->conn_wait, + test_bit(NBD_RT_DISCONNECTED, + &config->runtime_flags) || + atomic_read(&config->live_connections) > 0, + config->dead_conn_timeout)) return 0; - return wait_event_timeout(config->conn_wait, - atomic_read(&config->live_connections) > 0, - config->dead_conn_timeout) > 0; + + return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags); } static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) @@ -1355,7 +1359,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b static void nbd_clear_sock_ioctl(struct nbd_device *nbd, struct block_device *bdev) { - sock_shutdown(nbd); + nbd_clear_sock(nbd); __invalidate_device(bdev, true); nbd_bdev_reset(bdev); if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, @@ -1468,15 +1472,20 @@ static struct nbd_config *nbd_alloc_config(void) { struct nbd_config *config; + if (!try_module_get(THIS_MODULE)) + return ERR_PTR(-ENODEV); + config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); - if (!config) - return NULL; + if (!config) { + module_put(THIS_MODULE); + return ERR_PTR(-ENOMEM); + } + atomic_set(&config->recv_threads, 0); init_waitqueue_head(&config->recv_wq); init_waitqueue_head(&config->conn_wait); config->blksize = NBD_DEF_BLKSIZE; atomic_set(&config->live_connections, 0); - try_module_get(THIS_MODULE); return config; } @@ -1503,12 +1512,13 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) mutex_unlock(&nbd->config_lock); goto out; } - config = nbd->config = nbd_alloc_config(); - if (!config) { - ret = -ENOMEM; + config = nbd_alloc_config(); + if (IS_ERR(config)) { + ret = PTR_ERR(config); mutex_unlock(&nbd->config_lock); goto out; } + nbd->config = config; refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); @@ -1930,13 +1940,14 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) nbd_put(nbd); return -EINVAL; } - config = nbd->config = nbd_alloc_config(); - if (!nbd->config) { + config = nbd_alloc_config(); + if (IS_ERR(config)) { mutex_unlock(&nbd->config_lock); nbd_put(nbd); printk(KERN_ERR "nbd: couldn't allocate config\n"); - return -ENOMEM; + return PTR_ERR(config); } + nbd->config = config; refcount_set(&nbd->config_refs, 1); set_bit(NBD_RT_BOUND, &config->runtime_flags); @@ -2029,6 +2040,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) mutex_lock(&nbd->config_lock); nbd_disconnect(nbd); sock_shutdown(nbd); + wake_up(&nbd->config->conn_wait); /* * Make sure recv thread has finished, so it does not drop the last * config ref and try to destroy the workqueue from inside the work @@ -2456,6 +2468,12 @@ static void __exit nbd_cleanup(void) struct nbd_device *nbd; LIST_HEAD(del_list); + /* + * Unregister netlink interface prior to waiting + * for the completion of netlink commands. + */ + genl_unregister_family(&nbd_genl_family); + nbd_dbg_close(); mutex_lock(&nbd_index_mutex); @@ -2465,13 +2483,15 @@ static void __exit nbd_cleanup(void) while (!list_empty(&del_list)) { nbd = list_first_entry(&del_list, struct nbd_device, list); list_del_init(&nbd->list); + if (refcount_read(&nbd->config_refs)) + printk(KERN_ERR "nbd: possibly leaking nbd_config (ref %d)\n", + refcount_read(&nbd->config_refs)); if (refcount_read(&nbd->refs) != 1) printk(KERN_ERR "nbd: possibly leaking a device\n"); nbd_put(nbd); } idr_destroy(&nbd_index_idr); - genl_unregister_family(&nbd_genl_family); unregister_blkdev(NBD_MAJOR, "nbd"); } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 26d25188275a..ac68c845c6cc 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -867,11 +867,12 @@ static int virtblk_probe(struct virtio_device *vdev) blk_queue_io_opt(q, blk_size * opt_io_size); if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { - q->limits.discard_granularity = blk_size; - virtio_cread(vdev, struct virtio_blk_config, discard_sector_alignment, &v); - q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0; + if (v) + q->limits.discard_granularity = v << SECTOR_SHIFT; + else + q->limits.discard_granularity = blk_size; virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 47d4bb23d6f3..abbb68b6d9bd 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -151,6 +151,10 @@ static unsigned int xen_blkif_max_ring_order; module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); +static bool __read_mostly xen_blkif_trusted = true; +module_param_named(trusted, xen_blkif_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define BLK_RING_SIZE(info) \ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) @@ -208,6 +212,7 @@ struct blkfront_info unsigned int feature_discard:1; unsigned int feature_secdiscard:1; unsigned int feature_persistent:1; + unsigned int bounce:1; unsigned int discard_granularity; unsigned int discard_alignment; /* Number of 4KB segments handled */ @@ -310,8 +315,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) if (!gnt_list_entry) goto out_of_memory; - if (info->feature_persistent) { - granted_page = alloc_page(GFP_NOIO); + if (info->bounce) { + granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); if (!granted_page) { kfree(gnt_list_entry); goto out_of_memory; @@ -330,7 +335,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) list_for_each_entry_safe(gnt_list_entry, n, &rinfo->grants, node) { list_del(&gnt_list_entry->node); - if (info->feature_persistent) + if (info->bounce) __free_page(gnt_list_entry->page); kfree(gnt_list_entry); i--; @@ -376,7 +381,7 @@ static struct grant *get_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (info->feature_persistent) + if (info->bounce) grant_foreign_access(gnt_list_entry, info); else { /* Grant access to the GFN passed by the caller */ @@ -400,7 +405,7 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (!info->feature_persistent) { + if (!info->bounce) { struct page *indirect_page; /* Fetch a pre-allocated page to use for indirect grefs */ @@ -715,7 +720,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri .grant_idx = 0, .segments = NULL, .rinfo = rinfo, - .need_copy = rq_data_dir(req) && info->feature_persistent, + .need_copy = rq_data_dir(req) && info->bounce, }; /* @@ -1035,11 +1040,12 @@ static void xlvbd_flush(struct blkfront_info *info) { blk_queue_write_cache(info->rq, info->feature_flush ? true : false, info->feature_fua ? true : false); - pr_info("blkfront: %s: %s %s %s %s %s\n", + pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", info->gd->disk_name, flush_info(info), "persistent grants:", info->feature_persistent ? "enabled;" : "disabled;", "indirect descriptors:", - info->max_indirect_segments ? "enabled;" : "disabled;"); + info->max_indirect_segments ? "enabled;" : "disabled;", + "bounce buffer:", info->bounce ? "enabled" : "disabled;"); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -1273,7 +1279,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) if (!list_empty(&rinfo->indirect_pages)) { struct page *indirect_page, *n; - BUG_ON(info->feature_persistent); + BUG_ON(info->bounce); list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { list_del(&indirect_page->lru); __free_page(indirect_page); @@ -1290,7 +1296,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) 0, 0UL); rinfo->persistent_gnts_c--; } - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1311,7 +1317,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) for (j = 0; j < segs; j++) { persistent_gnt = rinfo->shadow[i].grants_used[j]; gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1501,7 +1507,7 @@ static int blkif_completion(unsigned long *id, data.s = s; num_sg = s->num_sg; - if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { + if (bret->operation == BLKIF_OP_READ && info->bounce) { for_each_sg(s->sg, sg, num_sg, i) { BUG_ON(sg->offset + sg->length > PAGE_SIZE); @@ -1560,7 +1566,7 @@ static int blkif_completion(unsigned long *id, * Add the used indirect page back to the list of * available pages for indirect grefs. */ - if (!info->feature_persistent) { + if (!info->bounce) { indirect_page = s->indirect_grants[i]->page; list_add(&indirect_page->lru, &rinfo->indirect_pages); } @@ -1753,7 +1759,7 @@ static int setup_blkring(struct xenbus_device *dev, for (i = 0; i < info->nr_ring_pages; i++) rinfo->ring_ref[i] = GRANT_INVALID_REF; - sring = alloc_pages_exact(ring_size, GFP_NOIO); + sring = alloc_pages_exact(ring_size, GFP_NOIO | __GFP_ZERO); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; @@ -1857,6 +1863,10 @@ static int talk_to_blkback(struct xenbus_device *dev, if (!info) return -ENODEV; + /* Check if backend is trusted. */ + info->bounce = !xen_blkif_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + max_page_order = xenbus_read_unsigned(info->xbdev->otherend, "max-ring-page-order", 0); ring_page_order = min(xen_blkif_max_ring_order, max_page_order); @@ -2283,17 +2293,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) if (err) goto out_of_memory; - if (!info->feature_persistent && info->max_indirect_segments) { + if (!info->bounce && info->max_indirect_segments) { /* - * We are using indirect descriptors but not persistent - * grants, we need to allocate a set of pages that can be + * We are using indirect descriptors but don't have a bounce + * buffer, we need to allocate a set of pages that can be * used for mapping indirect grefs */ int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); BUG_ON(!list_empty(&rinfo->indirect_pages)); for (i = 0; i < num; i++) { - struct page *indirect_page = alloc_page(GFP_KERNEL); + struct page *indirect_page = alloc_page(GFP_KERNEL | + __GFP_ZERO); if (!indirect_page) goto out_of_memory; list_add(&indirect_page->lru, &rinfo->indirect_pages); @@ -2386,6 +2397,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) info->feature_persistent = !!xenbus_read_unsigned(info->xbdev->otherend, "feature-persistent", 0); + if (info->feature_persistent) + info->bounce = true; indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, "feature-max-indirect-segments", 0); @@ -2759,6 +2772,13 @@ static void blkfront_delay_work(struct work_struct *work) struct blkfront_info *info; bool need_schedule_work = false; + /* + * Note that when using bounce buffers but not persistent grants + * there's no need to run blkfront_delay_work because grants are + * revoked in blkif_completion or else an error is reported and the + * connection is closed. + */ + mutex_lock(&blkfront_mutex); list_for_each_entry(info, &info_list, info_list) { diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 1b9743b7f2ef..d263eac784da 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -401,6 +401,8 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = { { 0x6606, "BCM4345C5" }, /* 003.006.006 */ { 0x230f, "BCM4356A2" }, /* 001.003.015 */ { 0x220e, "BCM20702A1" }, /* 001.002.014 */ + { 0x420d, "BCM4349B1" }, /* 002.002.013 */ + { 0x420e, "BCM4349B1" }, /* 002.002.014 */ { 0x4217, "BCM4329B1" }, /* 002.002.023 */ { 0x6106, "BCM4359C0" }, /* 003.001.006 */ { 0x4106, "BCM4335A0" }, /* 002.001.006 */ diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 538232b4c42a..a699e6166aef 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -399,6 +399,18 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0bda, 0xc822), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + /* Realtek 8852CE Bluetooth devices */ + { USB_DEVICE(0x04ca, 0x4007), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x04c5, 0x1675), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0cb8, 0xc558), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3587), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3586), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + /* Realtek Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), .driver_info = BTUSB_REALTEK }, @@ -416,6 +428,9 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH | BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, /* Additional Realtek 8723AE Bluetooth devices */ { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK }, diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 3de0ce5386ac..b57e2e42a912 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -1490,8 +1490,10 @@ static const struct of_device_id bcm_bluetooth_of_match[] = { { .compatible = "brcm,bcm4345c5" }, { .compatible = "brcm,bcm4330-bt" }, { .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data }, + { .compatible = "brcm,bcm4349-bt", .data = &bcm43438_device_data }, { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data }, { .compatible = "brcm,bcm4335a0" }, + { .compatible = "infineon,cyw55572-bt" }, { }, }; MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index dc7ee5dd2eec..eea18aed17f8 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -689,9 +689,9 @@ static int qca_close(struct hci_uart *hu) skb_queue_purge(&qca->tx_wait_q); skb_queue_purge(&qca->txq); skb_queue_purge(&qca->rx_memdump_q); - del_timer(&qca->tx_idle_timer); - del_timer(&qca->wake_retrans_timer); destroy_workqueue(qca->workqueue); + del_timer_sync(&qca->tx_idle_timer); + del_timer_sync(&qca->wake_retrans_timer); qca->hu = NULL; kfree_skb(qca->rx_skb); diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index ac559c262033..4ee20be76508 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -3291,7 +3291,9 @@ static int sysc_remove(struct platform_device *pdev) struct sysc *ddata = platform_get_drvdata(pdev); int error; - cancel_delayed_work_sync(&ddata->idle_work); + /* Device can still be enabled, see deferred idle quirk in probe */ + if (cancel_delayed_work_sync(&ddata->idle_work)) + ti_sysc_idle(&ddata->idle_work.work); error = pm_runtime_get_sync(ddata->dev); if (error < 0) { diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index d229a2d0c017..b4e65d1ede26 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -471,28 +471,41 @@ config ADI and SSM (Silicon Secured Memory). Intended consumers of this driver include crash and makedumpfile. -endmenu - config RANDOM_TRUST_CPU - bool "Trust the CPU manufacturer to initialize Linux's CRNG" + bool "Initialize RNG using CPU RNG instructions" + default y depends on ARCH_RANDOM - default n help - Assume that CPU manufacturer (e.g., Intel or AMD for RDSEED or - RDRAND, IBM for the S390 and Power PC architectures) is trustworthy - for the purposes of initializing Linux's CRNG. Since this is not - something that can be independently audited, this amounts to trusting - that CPU manufacturer (perhaps with the insistence or mandate - of a Nation State's intelligence or law enforcement agencies) - has not installed a hidden back door to compromise the CPU's - random number generation facilities. This can also be configured - at boot with "random.trust_cpu=on/off". + Initialize the RNG using random numbers supplied by the CPU's + RNG instructions (e.g. RDRAND), if supported and available. These + random numbers are never used directly, but are rather hashed into + the main input pool, and this happens regardless of whether or not + this option is enabled. Instead, this option controls whether the + they are credited and hence can initialize the RNG. Additionally, + other sources of randomness are always used, regardless of this + setting. Enabling this implies trusting that the CPU can supply high + quality and non-backdoored random numbers. + + Say Y here unless you have reason to mistrust your CPU or believe + its RNG facilities may be faulty. This may also be configured at + boot time with "random.trust_cpu=on/off". config RANDOM_TRUST_BOOTLOADER - bool "Trust the bootloader to initialize Linux's CRNG" + bool "Initialize RNG using bootloader-supplied seed" + default y help - Some bootloaders can provide entropy to increase the kernel's initial - device randomness. Say Y here to assume the entropy provided by the - booloader is trustworthy so it will be added to the kernel's entropy - pool. Otherwise, say N here so it will be regarded as device input that - only mixes the entropy pool. + Initialize the RNG using a seed supplied by the bootloader or boot + environment (e.g. EFI or a bootloader-generated device tree). This + seed is not used directly, but is rather hashed into the main input + pool, and this happens regardless of whether or not this option is + enabled. Instead, this option controls whether the seed is credited + and hence can initialize the RNG. Additionally, other sources of + randomness are always used, regardless of this setting. Enabling + this implies trusting that the bootloader can supply high quality and + non-backdoored seeds. + + Say Y here unless you have reason to mistrust your bootloader or + believe its RNG facilities may be faulty. This may also be configured + at boot time with "random.trust_bootloader=on/off". + +endmenu diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 8c1c47dd9f46..5749998feaa4 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index e0d77fa048fb..f06e4f95114f 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -92,7 +92,7 @@ static int __maybe_unused omap_rom_rng_runtime_resume(struct device *dev) r = ddata->rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT); if (r != 0) { - clk_disable(ddata->clk); + clk_disable_unprepare(ddata->clk); dev_err(dev, "HW init failed: %d\n", r); return -EIO; diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 8f147274f826..05e7339752ac 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -11,8 +11,8 @@ * Copyright 2002 MontaVista Software Inc. */ -#define pr_fmt(fmt) "%s" fmt, "IPMI message handler: " -#define dev_fmt pr_fmt +#define pr_fmt(fmt) "IPMI message handler: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include #include diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 3de679723648..477139749513 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -840,6 +840,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, break; case SSIF_GETTING_EVENTS: + if (!msg) { + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "No message set while getting events\n"); + ipmi_ssif_unlock_cond(ssif_info, flags); + break; + } + if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) { /* Error getting event, probably done. */ msg->done(msg); @@ -864,6 +872,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, break; case SSIF_GETTING_MESSAGES: + if (!msg) { + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "No message set while getting messages\n"); + ipmi_ssif_unlock_cond(ssif_info, flags); + break; + } + if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) { /* Error getting event, probably done. */ msg->done(msg); @@ -887,6 +903,13 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, deliver_recv_msg(ssif_info, msg); } break; + + default: + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "Invalid state in message done handling: %d\n", + ssif_info->ssif_state); + ipmi_ssif_unlock_cond(ssif_info, flags); } flags = ipmi_ssif_lock_cond(ssif_info, &oflags); diff --git a/drivers/char/random.c b/drivers/char/random.c index 5f541c946559..9b3f695e2e76 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1,310 +1,26 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* - * random.c -- A strong random number generator - * - * Copyright (C) 2017 Jason A. Donenfeld . All - * Rights Reserved. - * + * Copyright (C) 2017-2022 Jason A. Donenfeld . All Rights Reserved. * Copyright Matt Mackall , 2003, 2004, 2005 + * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved. * - * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All - * rights reserved. + * This driver produces cryptographically secure pseudorandom data. It is divided + * into roughly six sections, each with a section header: * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, and the entire permission notice in its entirety, - * including the disclaimer of warranties. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior - * written permission. + * - Initialization and readiness waiting. + * - Fast key erasure RNG, the "crng". + * - Entropy accumulation and extraction routines. + * - Entropy collection routines. + * - Userspace reader/writer interfaces. + * - Sysctl interface. * - * ALTERNATIVELY, this product may be distributed under the terms of - * the GNU General Public License, in which case the provisions of the GPL are - * required INSTEAD OF the above restrictions. (This clause is - * necessary due to a potential bad interaction between the GPL and - * the restrictions contained in a BSD-style copyright.) - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF - * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT - * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - */ - -/* - * (now, with legal B.S. out of the way.....) - * - * This routine gathers environmental noise from device drivers, etc., - * and returns good random numbers, suitable for cryptographic use. - * Besides the obvious cryptographic uses, these numbers are also good - * for seeding TCP sequence numbers, and other places where it is - * desirable to have numbers which are not only random, but hard to - * predict by an attacker. - * - * Theory of operation - * =================== - * - * Computers are very predictable devices. Hence it is extremely hard - * to produce truly random numbers on a computer --- as opposed to - * pseudo-random numbers, which can easily generated by using a - * algorithm. Unfortunately, it is very easy for attackers to guess - * the sequence of pseudo-random number generators, and for some - * applications this is not acceptable. So instead, we must try to - * gather "environmental noise" from the computer's environment, which - * must be hard for outside attackers to observe, and use that to - * generate random numbers. In a Unix environment, this is best done - * from inside the kernel. - * - * Sources of randomness from the environment include inter-keyboard - * timings, inter-interrupt timings from some interrupts, and other - * events which are both (a) non-deterministic and (b) hard for an - * outside observer to measure. Randomness from these sources are - * added to an "entropy pool", which is mixed using a CRC-like function. - * This is not cryptographically strong, but it is adequate assuming - * the randomness is not chosen maliciously, and it is fast enough that - * the overhead of doing it on every interrupt is very reasonable. - * As random bytes are mixed into the entropy pool, the routines keep - * an *estimate* of how many bits of randomness have been stored into - * the random number generator's internal state. - * - * When random bytes are desired, they are obtained by taking the SHA - * hash of the contents of the "entropy pool". The SHA hash avoids - * exposing the internal state of the entropy pool. It is believed to - * be computationally infeasible to derive any useful information - * about the input of SHA from its output. Even if it is possible to - * analyze SHA in some clever way, as long as the amount of data - * returned from the generator is less than the inherent entropy in - * the pool, the output data is totally unpredictable. For this - * reason, the routine decreases its internal estimate of how many - * bits of "true randomness" are contained in the entropy pool as it - * outputs random numbers. - * - * If this estimate goes to zero, the routine can still generate - * random numbers; however, an attacker may (at least in theory) be - * able to infer the future output of the generator from prior - * outputs. This requires successful cryptanalysis of SHA, which is - * not believed to be feasible, but there is a remote possibility. - * Nonetheless, these numbers should be useful for the vast majority - * of purposes. - * - * Exported interfaces ---- output - * =============================== - * - * There are four exported interfaces; two for use within the kernel, - * and two or use from userspace. - * - * Exported interfaces ---- userspace output - * ----------------------------------------- - * - * The userspace interfaces are two character devices /dev/random and - * /dev/urandom. /dev/random is suitable for use when very high - * quality randomness is desired (for example, for key generation or - * one-time pads), as it will only return a maximum of the number of - * bits of randomness (as estimated by the random number generator) - * contained in the entropy pool. - * - * The /dev/urandom device does not have this limit, and will return - * as many bytes as are requested. As more and more random bytes are - * requested without giving time for the entropy pool to recharge, - * this will result in random numbers that are merely cryptographically - * strong. For many applications, however, this is acceptable. - * - * Exported interfaces ---- kernel output - * -------------------------------------- - * - * The primary kernel interface is - * - * void get_random_bytes(void *buf, int nbytes); - * - * This interface will return the requested number of random bytes, - * and place it in the requested buffer. This is equivalent to a - * read from /dev/urandom. - * - * For less critical applications, there are the functions: - * - * u32 get_random_u32() - * u64 get_random_u64() - * unsigned int get_random_int() - * unsigned long get_random_long() - * - * These are produced by a cryptographic RNG seeded from get_random_bytes, - * and so do not deplete the entropy pool as much. These are recommended - * for most in-kernel operations *if the result is going to be stored in - * the kernel*. - * - * Specifically, the get_random_int() family do not attempt to do - * "anti-backtracking". If you capture the state of the kernel (e.g. - * by snapshotting the VM), you can figure out previous get_random_int() - * return values. But if the value is stored in the kernel anyway, - * this is not a problem. - * - * It *is* safe to expose get_random_int() output to attackers (e.g. as - * network cookies); given outputs 1..n, it's not feasible to predict - * outputs 0 or n+1. The only concern is an attacker who breaks into - * the kernel later; the get_random_int() engine is not reseeded as - * often as the get_random_bytes() one. - * - * get_random_bytes() is needed for keys that need to stay secret after - * they are erased from the kernel. For example, any key that will - * be wrapped and stored encrypted. And session encryption keys: we'd - * like to know that after the session is closed and the keys erased, - * the plaintext is unrecoverable to someone who recorded the ciphertext. - * - * But for network ports/cookies, stack canaries, PRNG seeds, address - * space layout randomization, session *authentication* keys, or other - * applications where the sensitive data is stored in the kernel in - * plaintext for as long as it's sensitive, the get_random_int() family - * is just fine. - * - * Consider ASLR. We want to keep the address space secret from an - * outside attacker while the process is running, but once the address - * space is torn down, it's of no use to an attacker any more. And it's - * stored in kernel data structures as long as it's alive, so worrying - * about an attacker's ability to extrapolate it from the get_random_int() - * CRNG is silly. - * - * Even some cryptographic keys are safe to generate with get_random_int(). - * In particular, keys for SipHash are generally fine. Here, knowledge - * of the key authorizes you to do something to a kernel object (inject - * packets to a network connection, or flood a hash table), and the - * key is stored with the object being protected. Once it goes away, - * we no longer care if anyone knows the key. - * - * prandom_u32() - * ------------- - * - * For even weaker applications, see the pseudorandom generator - * prandom_u32(), prandom_max(), and prandom_bytes(). If the random - * numbers aren't security-critical at all, these are *far* cheaper. - * Useful for self-tests, random error simulation, randomized backoffs, - * and any other application where you trust that nobody is trying to - * maliciously mess with you by guessing the "random" numbers. - * - * Exported interfaces ---- input - * ============================== - * - * The current exported interfaces for gathering environmental noise - * from the devices are: - * - * void add_device_randomness(const void *buf, unsigned int size); - * void add_input_randomness(unsigned int type, unsigned int code, - * unsigned int value); - * void add_interrupt_randomness(int irq, int irq_flags); - * void add_disk_randomness(struct gendisk *disk); - * - * add_device_randomness() is for adding data to the random pool that - * is likely to differ between two devices (or possibly even per boot). - * This would be things like MAC addresses or serial numbers, or the - * read-out of the RTC. This does *not* add any actual entropy to the - * pool, but it initializes the pool to different values for devices - * that might otherwise be identical and have very little entropy - * available to them (particularly common in the embedded world). - * - * add_input_randomness() uses the input layer interrupt timing, as well as - * the event type information from the hardware. - * - * add_interrupt_randomness() uses the interrupt timing as random - * inputs to the entropy pool. Using the cycle counters and the irq source - * as inputs, it feeds the randomness roughly once a second. - * - * add_disk_randomness() uses what amounts to the seek time of block - * layer request events, on a per-disk_devt basis, as input to the - * entropy pool. Note that high-speed solid state drives with very low - * seek times do not make for good sources of entropy, as their seek - * times are usually fairly consistent. - * - * All of these routines try to estimate how many bits of randomness a - * particular randomness source. They do this by keeping track of the - * first and second order deltas of the event timings. - * - * Ensuring unpredictability at system startup - * ============================================ - * - * When any operating system starts up, it will go through a sequence - * of actions that are fairly predictable by an adversary, especially - * if the start-up does not involve interaction with a human operator. - * This reduces the actual number of bits of unpredictability in the - * entropy pool below the value in entropy_count. In order to - * counteract this effect, it helps to carry information in the - * entropy pool across shut-downs and start-ups. To do this, put the - * following lines an appropriate script which is run during the boot - * sequence: - * - * echo "Initializing random number generator..." - * random_seed=/var/run/random-seed - * # Carry a random seed from start-up to start-up - * # Load and then save the whole entropy pool - * if [ -f $random_seed ]; then - * cat $random_seed >/dev/urandom - * else - * touch $random_seed - * fi - * chmod 600 $random_seed - * dd if=/dev/urandom of=$random_seed count=1 bs=512 - * - * and the following lines in an appropriate script which is run as - * the system is shutdown: - * - * # Carry a random seed from shut-down to start-up - * # Save the whole entropy pool - * echo "Saving random seed..." - * random_seed=/var/run/random-seed - * touch $random_seed - * chmod 600 $random_seed - * dd if=/dev/urandom of=$random_seed count=1 bs=512 - * - * For example, on most modern systems using the System V init - * scripts, such code fragments would be found in - * /etc/rc.d/init.d/random. On older Linux systems, the correct script - * location might be in /etc/rcb.d/rc.local or /etc/rc.d/rc.0. - * - * Effectively, these commands cause the contents of the entropy pool - * to be saved at shut-down time and reloaded into the entropy pool at - * start-up. (The 'dd' in the addition to the bootup script is to - * make sure that /etc/random-seed is different for every start-up, - * even if the system crashes without executing rc.0.) Even with - * complete knowledge of the start-up activities, predicting the state - * of the entropy pool requires knowledge of the previous history of - * the system. - * - * Configuring the /dev/random driver under Linux - * ============================================== - * - * The /dev/random driver under Linux uses minor numbers 8 and 9 of - * the /dev/mem major number (#1). So if your system does not have - * /dev/random and /dev/urandom created already, they can be created - * by using the commands: - * - * mknod /dev/random c 1 8 - * mknod /dev/urandom c 1 9 - * - * Acknowledgements: - * ================= - * - * Ideas for constructing this random number generator were derived - * from Pretty Good Privacy's random number generator, and from private - * discussions with Phil Karn. Colin Plumb provided a faster random - * number generator, which speed up the mixing function of the entropy - * pool, taken from PGPfone. Dale Worley has also contributed many - * useful ideas and suggestions to improve this driver. - * - * Any flaws in the design are solely my responsibility, and should - * not be attributed to the Phil, Colin, or any of authors of PGP. - * - * Further background information on this topic may be obtained from - * RFC 1750, "Randomness Recommendations for Security", by Donald - * Eastlake, Steve Crocker, and Jeff Schiller. + * The high level overview is that there is one input pool, into which + * various pieces of data are hashed. Prior to initialization, some of that + * data is then "credited" as having a certain number of bits of entropy. + * When enough bits of entropy are available, the hash is finalized and + * handed as a key to a stream cipher that expands it indefinitely for + * various consumers. This key is periodically refreshed as the various + * entropy collectors, described below, add data to the input pool. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -327,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -335,804 +50,806 @@ #include #include #include -#include -#include - -#include #include +#include +#include +#include +#include +#include #include #include #include -#define CREATE_TRACE_POINTS -#include - -/* #define ADD_INTERRUPT_BENCH */ - -/* - * Configuration information - */ -#define INPUT_POOL_SHIFT 12 -#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) -#define OUTPUT_POOL_SHIFT 10 -#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) -#define EXTRACT_SIZE 10 - - -#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long)) - -/* - * To allow fractional bits to be tracked, the entropy_count field is - * denominated in units of 1/8th bits. - * - * 2*(ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in - * credit_entropy_bits() needs to be 64 bits wide. - */ -#define ENTROPY_SHIFT 3 -#define ENTROPY_BITS(r) ((r)->entropy_count >> ENTROPY_SHIFT) - -/* - * If the entropy count falls under this number of bits, then we - * should wake up processes which are selecting or polling on write - * access to /dev/random. - */ -static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; - -/* - * Originally, we used a primitive polynomial of degree .poolwords - * over GF(2). The taps for various sizes are defined below. They - * were chosen to be evenly spaced except for the last tap, which is 1 - * to get the twisting happening as fast as possible. - * - * For the purposes of better mixing, we use the CRC-32 polynomial as - * well to make a (modified) twisted Generalized Feedback Shift - * Register. (See M. Matsumoto & Y. Kurita, 1992. Twisted GFSR - * generators. ACM Transactions on Modeling and Computer Simulation - * 2(3):179-194. Also see M. Matsumoto & Y. Kurita, 1994. Twisted - * GFSR generators II. ACM Transactions on Modeling and Computer - * Simulation 4:254-266) - * - * Thanks to Colin Plumb for suggesting this. - * - * The mixing operation is much less sensitive than the output hash, - * where we use SHA-1. All that we want of mixing operation is that - * it be a good non-cryptographic hash; i.e. it not produce collisions - * when fed "random" data of the sort we expect to see. As long as - * the pool state differs for different inputs, we have preserved the - * input entropy and done a good job. The fact that an intelligent - * attacker can construct inputs that will produce controlled - * alterations to the pool's state is not important because we don't - * consider such inputs to contribute any randomness. The only - * property we need with respect to them is that the attacker can't - * increase his/her knowledge of the pool's state. Since all - * additions are reversible (knowing the final state and the input, - * you can reconstruct the initial state), if an attacker has any - * uncertainty about the initial state, he/she can only shuffle that - * uncertainty about, but never cause any collisions (which would - * decrease the uncertainty). - * - * Our mixing functions were analyzed by Lacharme, Roeck, Strubel, and - * Videau in their paper, "The Linux Pseudorandom Number Generator - * Revisited" (see: http://eprint.iacr.org/2012/251.pdf). In their - * paper, they point out that we are not using a true Twisted GFSR, - * since Matsumoto & Kurita used a trinomial feedback polynomial (that - * is, with only three taps, instead of the six that we are using). - * As a result, the resulting polynomial is neither primitive nor - * irreducible, and hence does not have a maximal period over - * GF(2**32). They suggest a slight change to the generator - * polynomial which improves the resulting TGFSR polynomial to be - * irreducible, which we have made here. - */ -static const struct poolinfo { - int poolbitshift, poolwords, poolbytes, poolfracbits; -#define S(x) ilog2(x)+5, (x), (x)*4, (x) << (ENTROPY_SHIFT+5) - int tap1, tap2, tap3, tap4, tap5; -} poolinfo_table[] = { - /* was: x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 */ - /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ - { S(128), 104, 76, 51, 25, 1 }, -}; - -/* - * Static global variables - */ -static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); -static struct fasync_struct *fasync; - -static DEFINE_SPINLOCK(random_ready_list_lock); -static LIST_HEAD(random_ready_list); - -struct crng_state { - __u32 state[16]; - unsigned long init_time; - spinlock_t lock; -}; - -static struct crng_state primary_crng = { - .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock), -}; - -/* - * crng_init = 0 --> Uninitialized - * 1 --> Initialized - * 2 --> Initialized from input_pool - * - * crng_init is protected by primary_crng->lock, and only increases - * its value (from 0->1->2). - */ -static int crng_init = 0; -static bool crng_need_final_init = false; -#define crng_ready() (likely(crng_init > 1)) -static int crng_init_cnt = 0; -static unsigned long crng_global_init_time = 0; -#define CRNG_INIT_CNT_THRESH (2*CHACHA_KEY_SIZE) -static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]); -static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA_BLOCK_SIZE], int used); -static void process_random_ready_list(void); -static void _get_random_bytes(void *buf, int nbytes); - -static struct ratelimit_state unseeded_warning = - RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3); -static struct ratelimit_state urandom_warning = - RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); - -static int ratelimit_disable __read_mostly; - -module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); -MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); - -/********************************************************************** - * - * OS independent entropy store. Here are the functions which handle - * storing entropy in an entropy pool. - * - **********************************************************************/ - -struct entropy_store; -struct entropy_store { - /* read-only data: */ - const struct poolinfo *poolinfo; - __u32 *pool; - const char *name; - - /* read-write data: */ - spinlock_t lock; - unsigned short add_ptr; - unsigned short input_rotate; - int entropy_count; - unsigned int initialized:1; - unsigned int last_data_init:1; - __u8 last_data[EXTRACT_SIZE]; -}; - -static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int rsvd); -static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int fips); - -static void crng_reseed(struct crng_state *crng, struct entropy_store *r); -static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; - -static struct entropy_store input_pool = { - .poolinfo = &poolinfo_table[0], - .name = "input", - .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), - .pool = input_pool_data -}; - -static __u32 const twist_table[8] = { - 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, - 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; - -/* - * This function adds bytes into the entropy "pool". It does not - * update the entropy estimate. The caller should call - * credit_entropy_bits if this is appropriate. - * - * The pool is stirred with a primitive polynomial of the appropriate - * degree, and then twisted. We twist by three bits at a time because - * it's cheap to do so and helps slightly in the expected case where - * the entropy is concentrated in the low-order bits. - */ -static void _mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) -{ - unsigned long i, tap1, tap2, tap3, tap4, tap5; - int input_rotate; - int wordmask = r->poolinfo->poolwords - 1; - const char *bytes = in; - __u32 w; - - tap1 = r->poolinfo->tap1; - tap2 = r->poolinfo->tap2; - tap3 = r->poolinfo->tap3; - tap4 = r->poolinfo->tap4; - tap5 = r->poolinfo->tap5; - - input_rotate = r->input_rotate; - i = r->add_ptr; - - /* mix one byte at a time to simplify size handling and churn faster */ - while (nbytes--) { - w = rol32(*bytes++, input_rotate); - i = (i - 1) & wordmask; - - /* XOR in the various taps */ - w ^= r->pool[i]; - w ^= r->pool[(i + tap1) & wordmask]; - w ^= r->pool[(i + tap2) & wordmask]; - w ^= r->pool[(i + tap3) & wordmask]; - w ^= r->pool[(i + tap4) & wordmask]; - w ^= r->pool[(i + tap5) & wordmask]; - - /* Mix the result back in with a twist */ - r->pool[i] = (w >> 3) ^ twist_table[w & 7]; - - /* - * Normally, we add 7 bits of rotation to the pool. - * At the beginning of the pool, add an extra 7 bits - * rotation, so that successive passes spread the - * input bits across the pool evenly. - */ - input_rotate = (input_rotate + (i ? 7 : 14)) & 31; - } - - r->input_rotate = input_rotate; - r->add_ptr = i; -} - -static void __mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) -{ - trace_mix_pool_bytes_nolock(r->name, nbytes, _RET_IP_); - _mix_pool_bytes(r, in, nbytes); -} - -static void mix_pool_bytes(struct entropy_store *r, const void *in, - int nbytes) -{ - unsigned long flags; - - trace_mix_pool_bytes(r->name, nbytes, _RET_IP_); - spin_lock_irqsave(&r->lock, flags); - _mix_pool_bytes(r, in, nbytes); - spin_unlock_irqrestore(&r->lock, flags); -} - -struct fast_pool { - __u32 pool[4]; - unsigned long last; - unsigned short reg_idx; - unsigned char count; -}; - -/* - * This is a fast mixing routine used by the interrupt randomness - * collector. It's hardcoded for an 128 bit pool and assumes that any - * locks that might be needed are taken by the caller. - */ -static void fast_mix(struct fast_pool *f) -{ - __u32 a = f->pool[0], b = f->pool[1]; - __u32 c = f->pool[2], d = f->pool[3]; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 6); d = rol32(d, 27); - d ^= a; b ^= c; - - a += b; c += d; - b = rol32(b, 16); d = rol32(d, 14); - d ^= a; b ^= c; - - f->pool[0] = a; f->pool[1] = b; - f->pool[2] = c; f->pool[3] = d; - f->count++; -} - -static void process_random_ready_list(void) -{ - unsigned long flags; - struct random_ready_callback *rdy, *tmp; - - spin_lock_irqsave(&random_ready_list_lock, flags); - list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) { - struct module *owner = rdy->owner; - - list_del_init(&rdy->list); - rdy->func(rdy); - module_put(owner); - } - spin_unlock_irqrestore(&random_ready_list_lock, flags); -} - -/* - * Credit (or debit) the entropy store with n bits of entropy. - * Use credit_entropy_bits_safe() if the value comes from userspace - * or otherwise should be checked for extreme values. - */ -static void credit_entropy_bits(struct entropy_store *r, int nbits) -{ - int entropy_count, orig, has_initialized = 0; - const int pool_size = r->poolinfo->poolfracbits; - int nfrac = nbits << ENTROPY_SHIFT; - - if (!nbits) - return; - -retry: - entropy_count = orig = READ_ONCE(r->entropy_count); - if (nfrac < 0) { - /* Debit */ - entropy_count += nfrac; - } else { - /* - * Credit: we have to account for the possibility of - * overwriting already present entropy. Even in the - * ideal case of pure Shannon entropy, new contributions - * approach the full value asymptotically: - * - * entropy <- entropy + (pool_size - entropy) * - * (1 - exp(-add_entropy/pool_size)) - * - * For add_entropy <= pool_size/2 then - * (1 - exp(-add_entropy/pool_size)) >= - * (add_entropy/pool_size)*0.7869... - * so we can approximate the exponential with - * 3/4*add_entropy/pool_size and still be on the - * safe side by adding at most pool_size/2 at a time. - * - * The use of pool_size-2 in the while statement is to - * prevent rounding artifacts from making the loop - * arbitrarily long; this limits the loop to log2(pool_size)*2 - * turns no matter how large nbits is. - */ - int pnfrac = nfrac; - const int s = r->poolinfo->poolbitshift + ENTROPY_SHIFT + 2; - /* The +2 corresponds to the /4 in the denominator */ - - do { - unsigned int anfrac = min(pnfrac, pool_size/2); - unsigned int add = - ((pool_size - entropy_count)*anfrac*3) >> s; - - entropy_count += add; - pnfrac -= anfrac; - } while (unlikely(entropy_count < pool_size-2 && pnfrac)); - } - - if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy/overflow: pool %s count %d\n", - r->name, entropy_count); - entropy_count = 0; - } else if (entropy_count > pool_size) - entropy_count = pool_size; - if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) - goto retry; - - if (has_initialized) { - r->initialized = 1; - kill_fasync(&fasync, SIGIO, POLL_IN); - } - - trace_credit_entropy_bits(r->name, nbits, - entropy_count >> ENTROPY_SHIFT, _RET_IP_); - - if (r == &input_pool) { - int entropy_bits = entropy_count >> ENTROPY_SHIFT; - - if (crng_init < 2) { - if (entropy_bits < 128) - return; - crng_reseed(&primary_crng, r); - entropy_bits = ENTROPY_BITS(r); - } - } -} - -static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) -{ - const int nbits_max = r->poolinfo->poolwords * 32; - - if (nbits < 0) - return -EINVAL; - - /* Cap the value to avoid overflows */ - nbits = min(nbits, nbits_max); - - credit_entropy_bits(r, nbits); - return 0; -} +// GKI: Keep this header to retain the original CRC that previously used the +// random.h tracepoints. +#include /********************************************************************* * - * CRNG using CHACHA20 + * Initialization and readiness waiting. + * + * Much of the RNG infrastructure is devoted to various dependencies + * being able to wait until the RNG has collected enough entropy and + * is ready for safe consumption. * *********************************************************************/ -#define CRNG_RESEED_INTERVAL (300*HZ) - -static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); - -#ifdef CONFIG_NUMA /* - * Hack to deal with crazy userspace progams when they are all trying - * to access /dev/urandom in parallel. The programs are almost - * certainly doing something terribly wrong, but we'll work around - * their brain damage. + * crng_init is protected by base_crng->lock, and only increases + * its value (from empty->early->ready). */ -static struct crng_state **crng_node_pool __read_mostly; +static enum { + CRNG_EMPTY = 0, /* Little to no entropy collected */ + CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */ + CRNG_READY = 2 /* Fully initialized with POOL_READY_BITS collected */ +} crng_init __read_mostly = CRNG_EMPTY; +#define crng_ready() (likely(crng_init >= CRNG_READY)) +/* Various types of waiters for crng_init->CRNG_READY transition. */ +static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); +static struct fasync_struct *fasync; +static DEFINE_SPINLOCK(random_ready_chain_lock); +static RAW_NOTIFIER_HEAD(random_ready_chain); + +/* Control how we warn userspace. */ +static struct ratelimit_state urandom_warning = + RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE); +static int ratelimit_disable __read_mostly = + IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); +module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); +MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); + +/* + * Returns whether or not the input pool has been seeded and thus guaranteed + * to supply cryptographically secure random numbers. This applies to: the + * /dev/urandom device, the get_random_bytes function, and the get_random_{u32, + * ,u64,int,long} family of functions. + * + * Returns: true if the input pool has been seeded. + * false if the input pool has not been seeded. + */ +bool rng_is_initialized(void) +{ + return crng_ready(); +} +EXPORT_SYMBOL(rng_is_initialized); + +/* Used by wait_for_random_bytes(), and considered an entropy collector, below. */ +static void try_to_generate_entropy(void); + +/* + * Wait for the input pool to be seeded and thus guaranteed to supply + * cryptographically secure random numbers. This applies to: the /dev/urandom + * device, the get_random_bytes function, and the get_random_{u32,u64,int,long} + * family of functions. Using any of these functions without first calling + * this function forfeits the guarantee of security. + * + * Returns: 0 if the input pool has been seeded. + * -ERESTARTSYS if the function was interrupted by a signal. + */ +int wait_for_random_bytes(void) +{ + while (!crng_ready()) { + int ret; + + try_to_generate_entropy(); + ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); + if (ret) + return ret > 0 ? 0 : ret; + } + return 0; +} +EXPORT_SYMBOL(wait_for_random_bytes); + +/* + * Add a callback function that will be invoked when the input + * pool is initialised. + * + * returns: 0 if callback is successfully added + * -EALREADY if pool is already initialised (callback not called) + */ +int __cold register_random_ready_notifier(struct notifier_block *nb) +{ + unsigned long flags; + int ret = -EALREADY; + + if (crng_ready()) + return ret; + + spin_lock_irqsave(&random_ready_chain_lock, flags); + if (!crng_ready()) + ret = raw_notifier_chain_register(&random_ready_chain, nb); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); + return ret; +} + +/* + * Delete a previously registered readiness callback function. + */ +int __cold unregister_random_ready_notifier(struct notifier_block *nb) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&random_ready_chain_lock, flags); + ret = raw_notifier_chain_unregister(&random_ready_chain, nb); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); + return ret; +} + +static void process_oldschool_random_ready_list(void); +static void __cold process_random_ready_list(void) +{ + unsigned long flags; + + spin_lock_irqsave(&random_ready_chain_lock, flags); + raw_notifier_call_chain(&random_ready_chain, 0, NULL); + spin_unlock_irqrestore(&random_ready_chain_lock, flags); + + process_oldschool_random_ready_list(); +} + +#define warn_unseeded_randomness() \ + if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \ + printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \ + __func__, (void *)_RET_IP_, crng_init) + + +/********************************************************************* + * + * Fast key erasure RNG, the "crng". + * + * These functions expand entropy from the entropy extractor into + * long streams for external consumption using the "fast key erasure" + * RNG described at . + * + * There are a few exported interfaces for use by other drivers: + * + * void get_random_bytes(void *buf, size_t len) + * u32 get_random_u32() + * u64 get_random_u64() + * unsigned int get_random_int() + * unsigned long get_random_long() + * + * These interfaces will return the requested number of random bytes + * into the given buffer or as a return value. This is equivalent to + * a read from /dev/urandom. The u32, u64, int, and long family of + * functions may be higher performance for one-off random integers, + * because they do a bit of buffering and do not invoke reseeding + * until the buffer is emptied. + * + *********************************************************************/ + +enum { + CRNG_RESEED_START_INTERVAL = HZ, + CRNG_RESEED_INTERVAL = 60 * HZ +}; + +static struct { + u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long)); + unsigned long birth; + unsigned long generation; + spinlock_t lock; +} base_crng = { + .lock = __SPIN_LOCK_UNLOCKED(base_crng.lock) +}; + +struct crng { + u8 key[CHACHA_KEY_SIZE]; + unsigned long generation; + local_lock_t lock; +}; + +static DEFINE_PER_CPU(struct crng, crngs) = { + .generation = ULONG_MAX, + .lock = INIT_LOCAL_LOCK(crngs.lock), +}; + +/* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */ +static void extract_entropy(void *buf, size_t len); + +/* This extracts a new crng key from the input pool. */ +static void crng_reseed(void) +{ + unsigned long flags; + unsigned long next_gen; + u8 key[CHACHA_KEY_SIZE]; + + extract_entropy(key, sizeof(key)); + + /* + * We copy the new key into the base_crng, overwriting the old one, + * and update the generation counter. We avoid hitting ULONG_MAX, + * because the per-cpu crngs are initialized to ULONG_MAX, so this + * forces new CPUs that come online to always initialize. + */ + spin_lock_irqsave(&base_crng.lock, flags); + memcpy(base_crng.key, key, sizeof(base_crng.key)); + next_gen = base_crng.generation + 1; + if (next_gen == ULONG_MAX) + ++next_gen; + WRITE_ONCE(base_crng.generation, next_gen); + WRITE_ONCE(base_crng.birth, jiffies); + if (!crng_ready()) + crng_init = CRNG_READY; + spin_unlock_irqrestore(&base_crng.lock, flags); + memzero_explicit(key, sizeof(key)); +} + +/* + * This generates a ChaCha block using the provided key, and then + * immediately overwites that key with half the block. It returns + * the resultant ChaCha state to the user, along with the second + * half of the block containing 32 bytes of random data that may + * be used; random_data_len may not be greater than 32. + * + * The returned ChaCha state contains within it a copy of the old + * key value, at index 4, so the state should always be zeroed out + * immediately after using in order to maintain forward secrecy. + * If the state cannot be erased in a timely manner, then it is + * safer to set the random_data parameter to &chacha_state[4] so + * that this function overwrites it before returning. + */ +static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], + u32 chacha_state[CHACHA_STATE_WORDS], + u8 *random_data, size_t random_data_len) +{ + u8 first_block[CHACHA_BLOCK_SIZE]; + + BUG_ON(random_data_len > 32); + + chacha_init_consts(chacha_state); + memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE); + memset(&chacha_state[12], 0, sizeof(u32) * 4); + chacha20_block(chacha_state, first_block); + + memcpy(key, first_block, CHACHA_KEY_SIZE); + memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); + memzero_explicit(first_block, sizeof(first_block)); +} + +/* + * Return whether the crng seed is considered to be sufficiently old + * that a reseeding is needed. This happens if the last reseeding + * was CRNG_RESEED_INTERVAL ago, or during early boot, at an interval + * proportional to the uptime. + */ +static bool crng_has_old_seed(void) +{ + static bool early_boot = true; + unsigned long interval = CRNG_RESEED_INTERVAL; + + if (unlikely(READ_ONCE(early_boot))) { + time64_t uptime = ktime_get_seconds(); + if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2) + WRITE_ONCE(early_boot, false); + else + interval = max_t(unsigned int, CRNG_RESEED_START_INTERVAL, + (unsigned int)uptime / 2 * HZ); + } + return time_is_before_jiffies(READ_ONCE(base_crng.birth) + interval); +} + +/* + * This function returns a ChaCha state that you may use for generating + * random data. It also returns up to 32 bytes on its own of random data + * that may be used; random_data_len may not be greater than 32. + */ +static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], + u8 *random_data, size_t random_data_len) +{ + unsigned long flags; + struct crng *crng; + + BUG_ON(random_data_len > 32); + + /* + * For the fast path, we check whether we're ready, unlocked first, and + * then re-check once locked later. In the case where we're really not + * ready, we do fast key erasure with the base_crng directly, extracting + * when crng_init is CRNG_EMPTY. + */ + if (!crng_ready()) { + bool ready; + + spin_lock_irqsave(&base_crng.lock, flags); + ready = crng_ready(); + if (!ready) { + if (crng_init == CRNG_EMPTY) + extract_entropy(base_crng.key, sizeof(base_crng.key)); + crng_fast_key_erasure(base_crng.key, chacha_state, + random_data, random_data_len); + } + spin_unlock_irqrestore(&base_crng.lock, flags); + if (!ready) + return; + } + + /* + * If the base_crng is old enough, we reseed, which in turn bumps the + * generation counter that we check below. + */ + if (unlikely(crng_has_old_seed())) + crng_reseed(); + + local_lock_irqsave(&crngs.lock, flags); + crng = raw_cpu_ptr(&crngs); + + /* + * If our per-cpu crng is older than the base_crng, then it means + * somebody reseeded the base_crng. In that case, we do fast key + * erasure on the base_crng, and use its output as the new key + * for our per-cpu crng. This brings us up to date with base_crng. + */ + if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) { + spin_lock(&base_crng.lock); + crng_fast_key_erasure(base_crng.key, chacha_state, + crng->key, sizeof(crng->key)); + crng->generation = base_crng.generation; + spin_unlock(&base_crng.lock); + } + + /* + * Finally, when we've made it this far, our per-cpu crng has an up + * to date key, and we can do fast key erasure with it to produce + * some random data and a ChaCha state for the caller. All other + * branches of this function are "unlikely", so most of the time we + * should wind up here immediately. + */ + crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len); + local_unlock_irqrestore(&crngs.lock, flags); +} + +static void _get_random_bytes(void *buf, size_t len) +{ + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 tmp[CHACHA_BLOCK_SIZE]; + size_t first_block_len; + + if (!len) + return; + + first_block_len = min_t(size_t, 32, len); + crng_make_state(chacha_state, buf, first_block_len); + len -= first_block_len; + buf += first_block_len; + + while (len) { + if (len < CHACHA_BLOCK_SIZE) { + chacha20_block(chacha_state, tmp); + memcpy(buf, tmp, len); + memzero_explicit(tmp, sizeof(tmp)); + break; + } + + chacha20_block(chacha_state, buf); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; + len -= CHACHA_BLOCK_SIZE; + buf += CHACHA_BLOCK_SIZE; + } + + memzero_explicit(chacha_state, sizeof(chacha_state)); +} + +/* + * This function is the exported kernel interface. It returns some + * number of good random numbers, suitable for key generation, seeding + * TCP sequence numbers, etc. It does not rely on the hardware random + * number generator. For random bytes direct from the hardware RNG + * (when available), use get_random_bytes_arch(). In order to ensure + * that the randomness provided by this function is okay, the function + * wait_for_random_bytes() should be called and return 0 at least once + * at any point prior. + */ +void get_random_bytes(void *buf, int len) +{ + warn_unseeded_randomness(); + _get_random_bytes(buf, len); +} +EXPORT_SYMBOL(get_random_bytes); + +static ssize_t get_random_bytes_user(struct iov_iter *iter) +{ + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 block[CHACHA_BLOCK_SIZE]; + size_t ret = 0, copied; + + if (unlikely(!iov_iter_count(iter))) + return 0; + + /* + * Immediately overwrite the ChaCha key at index 4 with random + * bytes, in case userspace causes copy_to_iter() below to sleep + * forever, so that we still retain forward secrecy in that case. + */ + crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE); + /* + * However, if we're doing a read of len <= 32, we don't need to + * use chacha_state after, so we can simply return those bytes to + * the user directly. + */ + if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) { + ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter); + goto out_zero_chacha; + } + + for (;;) { + chacha20_block(chacha_state, block); + if (unlikely(chacha_state[12] == 0)) + ++chacha_state[13]; + + copied = copy_to_iter(block, sizeof(block), iter); + ret += copied; + if (!iov_iter_count(iter) || copied != sizeof(block)) + break; + + BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); + if (ret % PAGE_SIZE == 0) { + if (signal_pending(current)) + break; + cond_resched(); + } + } + + memzero_explicit(block, sizeof(block)); +out_zero_chacha: + memzero_explicit(chacha_state, sizeof(chacha_state)); + return ret ? ret : -EFAULT; +} + +/* + * Batched entropy returns random integers. The quality of the random + * number is good as /dev/urandom. In order to ensure that the randomness + * provided by this function is okay, the function wait_for_random_bytes() + * should be called and return 0 at least once at any point prior. + */ + +#define DEFINE_BATCHED_ENTROPY(type) \ +struct batch_ ##type { \ + /* \ + * We make this 1.5x a ChaCha block, so that we get the \ + * remaining 32 bytes from fast key erasure, plus one full \ + * block from the detached ChaCha state. We can increase \ + * the size of this later if needed so long as we keep the \ + * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. \ + */ \ + type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))]; \ + local_lock_t lock; \ + unsigned long generation; \ + unsigned int position; \ +}; \ + \ +static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = { \ + .lock = INIT_LOCAL_LOCK(batched_entropy_ ##type.lock), \ + .position = UINT_MAX \ +}; \ + \ +type get_random_ ##type(void) \ +{ \ + type ret; \ + unsigned long flags; \ + struct batch_ ##type *batch; \ + unsigned long next_gen; \ + \ + warn_unseeded_randomness(); \ + \ + if (!crng_ready()) { \ + _get_random_bytes(&ret, sizeof(ret)); \ + return ret; \ + } \ + \ + local_lock_irqsave(&batched_entropy_ ##type.lock, flags); \ + batch = raw_cpu_ptr(&batched_entropy_##type); \ + \ + next_gen = READ_ONCE(base_crng.generation); \ + if (batch->position >= ARRAY_SIZE(batch->entropy) || \ + next_gen != batch->generation) { \ + _get_random_bytes(batch->entropy, sizeof(batch->entropy)); \ + batch->position = 0; \ + batch->generation = next_gen; \ + } \ + \ + ret = batch->entropy[batch->position]; \ + batch->entropy[batch->position] = 0; \ + ++batch->position; \ + local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags); \ + return ret; \ +} \ +EXPORT_SYMBOL(get_random_ ##type); + +DEFINE_BATCHED_ENTROPY(u64) +DEFINE_BATCHED_ENTROPY(u32) + +#ifdef CONFIG_SMP +/* + * This function is called when the CPU is coming up, with entry + * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP. + */ +int __cold random_prepare_cpu(unsigned int cpu) +{ + /* + * When the cpu comes back online, immediately invalidate both + * the per-cpu crng and all batches, so that we serve fresh + * randomness. + */ + per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX; + per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX; + per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX; + return 0; +} #endif -static void invalidate_batched_entropy(void); -static void numa_crng_init(void); +/* + * This function will use the architecture-specific hardware random + * number generator if it is available. It is not recommended for + * use. Use get_random_bytes() instead. It returns the number of + * bytes filled in. + */ +int __must_check get_random_bytes_arch(void *buf, int len) +{ + size_t left = len; + u8 *p = buf; + + while (left) { + unsigned long v; + size_t block_len = min_t(size_t, left, sizeof(unsigned long)); + + if (!arch_get_random_long(&v)) + break; + + memcpy(p, &v, block_len); + p += block_len; + left -= block_len; + } + + return len - left; +} +EXPORT_SYMBOL(get_random_bytes_arch); + + +/********************************************************************** + * + * Entropy accumulation and extraction routines. + * + * Callers may add entropy via: + * + * static void mix_pool_bytes(const void *buf, size_t len) + * + * After which, if added entropy should be credited: + * + * static void credit_init_bits(size_t bits) + * + * Finally, extract entropy via: + * + * static void extract_entropy(void *buf, size_t len) + * + **********************************************************************/ + +enum { + POOL_BITS = BLAKE2S_HASH_SIZE * 8, + POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */ + POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */ +}; + +static struct { + struct blake2s_state hash; + spinlock_t lock; + unsigned int init_bits; +} input_pool = { + .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), + BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, + BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, + .hash.outlen = BLAKE2S_HASH_SIZE, + .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), +}; + +static void _mix_pool_bytes(const void *buf, size_t len) +{ + blake2s_update(&input_pool.hash, buf, len); +} + +/* + * This function adds bytes into the input pool. It does not + * update the initialization bit counter; the caller should call + * credit_init_bits if this is appropriate. + */ +static void mix_pool_bytes(const void *buf, size_t len) +{ + unsigned long flags; + + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(buf, len); + spin_unlock_irqrestore(&input_pool.lock, flags); +} + +/* + * This is an HKDF-like construction for using the hashed collected entropy + * as a PRF key, that's then expanded block-by-block. + */ +static void extract_entropy(void *buf, size_t len) +{ + unsigned long flags; + u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; + struct { + unsigned long rdseed[32 / sizeof(long)]; + size_t counter; + } block; + size_t i; + + for (i = 0; i < ARRAY_SIZE(block.rdseed); ++i) { + if (!arch_get_random_seed_long(&block.rdseed[i]) && + !arch_get_random_long(&block.rdseed[i])) + block.rdseed[i] = random_get_entropy(); + } + + spin_lock_irqsave(&input_pool.lock, flags); + + /* seed = HASHPRF(last_key, entropy_input) */ + blake2s_final(&input_pool.hash, seed); + + /* next_key = HASHPRF(seed, RDSEED || 0) */ + block.counter = 0; + blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); + blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); + + spin_unlock_irqrestore(&input_pool.lock, flags); + memzero_explicit(next_key, sizeof(next_key)); + + while (len) { + i = min_t(size_t, len, BLAKE2S_HASH_SIZE); + /* output = HASHPRF(seed, RDSEED || ++counter) */ + ++block.counter; + blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); + len -= i; + buf += i; + } + + memzero_explicit(seed, sizeof(seed)); + memzero_explicit(&block, sizeof(block)); +} + +#define credit_init_bits(bits) if (!crng_ready()) _credit_init_bits(bits) + +static void __cold _credit_init_bits(size_t bits) +{ + unsigned int new, orig, add; + unsigned long flags; + + if (!bits) + return; + + add = min_t(size_t, bits, POOL_BITS); + + do { + orig = READ_ONCE(input_pool.init_bits); + new = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.init_bits, orig, new) != orig); + + if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { + crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */ + process_random_ready_list(); + wake_up_interruptible(&crng_init_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); + pr_notice("crng init done\n"); + if (urandom_warning.missed) + pr_notice("%d urandom warning(s) missed due to ratelimiting\n", + urandom_warning.missed); + } else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) { + spin_lock_irqsave(&base_crng.lock, flags); + /* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */ + if (crng_init == CRNG_EMPTY) { + extract_entropy(base_crng.key, sizeof(base_crng.key)); + crng_init = CRNG_EARLY; + } + spin_unlock_irqrestore(&base_crng.lock, flags); + } +} + + +/********************************************************************** + * + * Entropy collection routines. + * + * The following exported functions are used for pushing entropy into + * the above entropy accumulation routines: + * + * void add_device_randomness(const void *buf, size_t len); + * void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); + * void add_bootloader_randomness(const void *buf, size_t len); + * void add_interrupt_randomness(int irq); + * void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); + * void add_disk_randomness(struct gendisk *disk); + * + * add_device_randomness() adds data to the input pool that + * is likely to differ between two devices (or possibly even per boot). + * This would be things like MAC addresses or serial numbers, or the + * read-out of the RTC. This does *not* credit any actual entropy to + * the pool, but it initializes the pool to different values for devices + * that might otherwise be identical and have very little entropy + * available to them (particularly common in the embedded world). + * + * add_hwgenerator_randomness() is for true hardware RNGs, and will credit + * entropy as specified by the caller. If the entropy pool is full it will + * block until more entropy is needed. + * + * add_bootloader_randomness() is called by bootloader drivers, such as EFI + * and device tree, and credits its input depending on whether or not the + * configuration option CONFIG_RANDOM_TRUST_BOOTLOADER is set. + * + * add_interrupt_randomness() uses the interrupt timing as random + * inputs to the entropy pool. Using the cycle counters and the irq source + * as inputs, it feeds the input pool roughly once a second or after 64 + * interrupts, crediting 1 bit of entropy for whichever comes first. + * + * add_input_randomness() uses the input layer interrupt timing, as well + * as the event type information from the hardware. + * + * add_disk_randomness() uses what amounts to the seek time of block + * layer request events, on a per-disk_devt basis, as input to the + * entropy pool. Note that high-speed solid state drives with very low + * seek times do not make for good sources of entropy, as their seek + * times are usually fairly consistent. + * + * The last two routines try to estimate how many bits of entropy + * to credit. They do this by keeping track of the first and second + * order deltas of the event timings. + * + **********************************************************************/ static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static bool trust_bootloader __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER); static int __init parse_trust_cpu(char *arg) { return kstrtobool(arg, &trust_cpu); } +static int __init parse_trust_bootloader(char *arg) +{ + return kstrtobool(arg, &trust_bootloader); +} early_param("random.trust_cpu", parse_trust_cpu); +early_param("random.trust_bootloader", parse_trust_bootloader); -static bool crng_init_try_arch(struct crng_state *crng) +/* + * The first collection of entropy occurs at system boot while interrupts + * are still turned off. Here we push in latent entropy, RDSEED, a timestamp, + * utsname(), and the command line. Depending on the above configuration knob, + * RDSEED may be considered sufficient for initialization. Note that much + * earlier setup may already have pushed entropy into the input pool by the + * time we get here. + */ +int __init random_init(const char *command_line) { - int i; - bool arch_init = true; - unsigned long rv; + ktime_t now = ktime_get_real(); + unsigned int i, arch_bytes; + unsigned long entropy; - for (i = 4; i < 16; i++) { - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - crng->state[i] ^= rv; - } - - return arch_init; -} - -static bool __init crng_init_try_arch_early(struct crng_state *crng) -{ - int i; - bool arch_init = true; - unsigned long rv; - - for (i = 4; i < 16; i++) { - if (!arch_get_random_seed_long_early(&rv) && - !arch_get_random_long_early(&rv)) { - rv = random_get_entropy(); - arch_init = false; - } - crng->state[i] ^= rv; - } - - return arch_init; -} - -static void __maybe_unused crng_initialize_secondary(struct crng_state *crng) -{ - chacha_init_consts(crng->state); - _get_random_bytes(&crng->state[4], sizeof(__u32) * 12); - crng_init_try_arch(crng); - crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; -} - -static void __init crng_initialize_primary(struct crng_state *crng) -{ - chacha_init_consts(crng->state); - _extract_entropy(&input_pool, &crng->state[4], sizeof(__u32) * 12, 0); - if (crng_init_try_arch_early(crng) && trust_cpu) { - invalidate_batched_entropy(); - numa_crng_init(); - crng_init = 2; - pr_notice("crng done (trusting CPU's manufacturer)\n"); - } - crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; -} - -static void crng_finalize_init(struct crng_state *crng) -{ - if (crng != &primary_crng || crng_init >= 2) - return; - if (!system_wq) { - /* We can't call numa_crng_init until we have workqueues, - * so mark this for processing later. */ - crng_need_final_init = true; - return; - } - - invalidate_batched_entropy(); - numa_crng_init(); - crng_init = 2; - process_random_ready_list(); - wake_up_interruptible(&crng_init_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); - pr_notice("crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { - pr_notice("%d urandom warning(s) missed due to ratelimiting\n", - urandom_warning.missed); - urandom_warning.missed = 0; - } -} - -#ifdef CONFIG_NUMA -static void do_numa_crng_init(struct work_struct *work) -{ - int i; - struct crng_state *crng; - struct crng_state **pool; - - pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL); - for_each_online_node(i) { - crng = kmalloc_node(sizeof(struct crng_state), - GFP_KERNEL | __GFP_NOFAIL, i); - spin_lock_init(&crng->lock); - crng_initialize_secondary(crng); - pool[i] = crng; - } - /* pairs with READ_ONCE() in select_crng() */ - if (cmpxchg_release(&crng_node_pool, NULL, pool) != NULL) { - for_each_node(i) - kfree(pool[i]); - kfree(pool); - } -} - -static DECLARE_WORK(numa_crng_init_work, do_numa_crng_init); - -static void numa_crng_init(void) -{ - schedule_work(&numa_crng_init_work); -} - -static struct crng_state *select_crng(void) -{ - struct crng_state **pool; - int nid = numa_node_id(); - - /* pairs with cmpxchg_release() in do_numa_crng_init() */ - pool = READ_ONCE(crng_node_pool); - if (pool && pool[nid]) - return pool[nid]; - - return &primary_crng; -} -#else -static void numa_crng_init(void) {} - -static struct crng_state *select_crng(void) -{ - return &primary_crng; -} +#if defined(LATENT_ENTROPY_PLUGIN) + static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy; + _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed)); #endif -/* - * crng_fast_load() can be called by code in the interrupt service - * path. So we can't afford to dilly-dally. Returns the number of - * bytes processed from cp. - */ -static size_t crng_fast_load(const char *cp, size_t len) -{ - unsigned long flags; - char *p; - size_t ret = 0; - - if (!spin_trylock_irqsave(&primary_crng.lock, flags)) - return 0; - if (crng_init != 0) { - spin_unlock_irqrestore(&primary_crng.lock, flags); - return 0; - } - p = (unsigned char *) &primary_crng.state[4]; - while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { - p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; - cp++; crng_init_cnt++; len--; ret++; - } - spin_unlock_irqrestore(&primary_crng.lock, flags); - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - invalidate_batched_entropy(); - crng_init = 1; - pr_notice("fast init done\n"); - } - return ret; -} - -/* - * crng_slow_load() is called by add_device_randomness, which has two - * attributes. (1) We can't trust the buffer passed to it is - * guaranteed to be unpredictable (so it might not have any entropy at - * all), and (2) it doesn't have the performance constraints of - * crng_fast_load(). - * - * So we do something more comprehensive which is guaranteed to touch - * all of the primary_crng's state, and which uses a LFSR with a - * period of 255 as part of the mixing algorithm. Finally, we do - * *not* advance crng_init_cnt since buffer we may get may be something - * like a fixed DMI table (for example), which might very well be - * unique to the machine, but is otherwise unvarying. - */ -static int crng_slow_load(const char *cp, size_t len) -{ - unsigned long flags; - static unsigned char lfsr = 1; - unsigned char tmp; - unsigned i, max = CHACHA_KEY_SIZE; - const char * src_buf = cp; - char * dest_buf = (char *) &primary_crng.state[4]; - - if (!spin_trylock_irqsave(&primary_crng.lock, flags)) - return 0; - if (crng_init != 0) { - spin_unlock_irqrestore(&primary_crng.lock, flags); - return 0; - } - if (len > max) - max = len; - - for (i = 0; i < max ; i++) { - tmp = lfsr; - lfsr >>= 1; - if (tmp & 1) - lfsr ^= 0xE1; - tmp = dest_buf[i % CHACHA_KEY_SIZE]; - dest_buf[i % CHACHA_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; - lfsr += (tmp << 3) | (tmp >> 5); - } - spin_unlock_irqrestore(&primary_crng.lock, flags); - return 1; -} - -static void crng_reseed(struct crng_state *crng, struct entropy_store *r) -{ - unsigned long flags; - int i, num; - union { - __u8 block[CHACHA_BLOCK_SIZE]; - __u32 key[8]; - } buf; - - if (r) { - num = extract_entropy(r, &buf, 32, 16, 0); - if (num == 0) - return; - } else { - _extract_crng(&primary_crng, buf.block); - _crng_backtrack_protect(&primary_crng, buf.block, - CHACHA_KEY_SIZE); - } - spin_lock_irqsave(&crng->lock, flags); - for (i = 0; i < 8; i++) { - unsigned long rv; - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) - rv = random_get_entropy(); - crng->state[i+4] ^= buf.key[i] ^ rv; - } - memzero_explicit(&buf, sizeof(buf)); - WRITE_ONCE(crng->init_time, jiffies); - spin_unlock_irqrestore(&crng->lock, flags); - crng_finalize_init(crng); -} - -static void _extract_crng(struct crng_state *crng, - __u8 out[CHACHA_BLOCK_SIZE]) -{ - unsigned long v, flags, init_time; - - if (crng_ready()) { - init_time = READ_ONCE(crng->init_time); - if (time_after(READ_ONCE(crng_global_init_time), init_time) || - time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(crng, crng == &primary_crng ? - &input_pool : NULL); - } - spin_lock_irqsave(&crng->lock, flags); - if (arch_get_random_long(&v)) - crng->state[14] ^= v; - chacha20_block(&crng->state[0], out); - if (crng->state[12] == 0) - crng->state[13]++; - spin_unlock_irqrestore(&crng->lock, flags); -} - -static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) -{ - _extract_crng(select_crng(), out); -} - -/* - * Use the leftover bytes from the CRNG block output (if there is - * enough) to mutate the CRNG key to provide backtracking protection. - */ -static void _crng_backtrack_protect(struct crng_state *crng, - __u8 tmp[CHACHA_BLOCK_SIZE], int used) -{ - unsigned long flags; - __u32 *s, *d; - int i; - - used = round_up(used, sizeof(__u32)); - if (used + CHACHA_KEY_SIZE > CHACHA_BLOCK_SIZE) { - extract_crng(tmp); - used = 0; - } - spin_lock_irqsave(&crng->lock, flags); - s = (__u32 *) &tmp[used]; - d = &crng->state[4]; - for (i=0; i < 8; i++) - *d++ ^= *s++; - spin_unlock_irqrestore(&crng->lock, flags); -} - -static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) -{ - _crng_backtrack_protect(select_crng(), tmp, used); -} - -static ssize_t extract_crng_user(void __user *buf, size_t nbytes) -{ - ssize_t ret = 0, i = CHACHA_BLOCK_SIZE; - __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); - int large_request = (nbytes > 256); - - while (nbytes) { - if (large_request && need_resched()) { - if (signal_pending(current)) { - if (ret == 0) - ret = -ERESTARTSYS; - break; - } - schedule(); + for (i = 0, arch_bytes = BLAKE2S_BLOCK_SIZE; + i < BLAKE2S_BLOCK_SIZE; i += sizeof(entropy)) { + if (!arch_get_random_seed_long_early(&entropy) && + !arch_get_random_long_early(&entropy)) { + entropy = random_get_entropy(); + arch_bytes -= sizeof(entropy); } - - extract_crng(tmp); - i = min_t(int, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, tmp, i)) { - ret = -EFAULT; - break; - } - - nbytes -= i; - buf += i; - ret += i; + _mix_pool_bytes(&entropy, sizeof(entropy)); } - crng_backtrack_protect(tmp, i); + _mix_pool_bytes(&now, sizeof(now)); + _mix_pool_bytes(utsname(), sizeof(*(utsname()))); + _mix_pool_bytes(command_line, strlen(command_line)); + add_latent_entropy(); - /* Wipe data just written to memory */ - memzero_explicit(tmp, sizeof(tmp)); + if (crng_ready()) + crng_reseed(); + else if (trust_cpu) + credit_init_bits(arch_bytes * 8); - return ret; + return 0; } - -/********************************************************************* - * - * Entropy input management - * - *********************************************************************/ - -/* There is one of these per entropy source */ -struct timer_rand_state { - cycles_t last_time; - long last_delta, last_delta2; -}; - -#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, }; - /* * Add device- or boot-specific data to the input pool to help * initialize it. @@ -1141,57 +858,208 @@ struct timer_rand_state { * the entropy pool having similar initial state across largely * identical devices. */ -void add_device_randomness(const void *buf, unsigned int size) +void add_device_randomness(const void *buf, unsigned int len) { - unsigned long time = random_get_entropy() ^ jiffies; + unsigned long entropy = random_get_entropy(); unsigned long flags; - if (!crng_ready() && size) - crng_slow_load(buf, size); - - trace_add_device_randomness(size, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&input_pool, buf, size); - _mix_pool_bytes(&input_pool, &time, sizeof(time)); + _mix_pool_bytes(&entropy, sizeof(entropy)); + _mix_pool_bytes(buf, len); spin_unlock_irqrestore(&input_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); -static struct timer_rand_state input_timer_state = INIT_TIMER_RAND_STATE; +/* + * Interface for in-kernel drivers of true hardware RNGs. + * Those devices may produce endless random bits and will be throttled + * when our pool is full. + */ +void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy) +{ + mix_pool_bytes(buf, len); + credit_init_bits(entropy); + + /* + * Throttle writing to once every CRNG_RESEED_INTERVAL, unless + * we're not yet initialized. + */ + if (!kthread_should_stop() && crng_ready()) + schedule_timeout_interruptible(CRNG_RESEED_INTERVAL); +} +EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); + +/* + * Handle random seed passed by bootloader, and credit it if + * CONFIG_RANDOM_TRUST_BOOTLOADER is set. + */ +void __cold add_bootloader_randomness(const void *buf, size_t len) +{ + mix_pool_bytes(buf, len); + if (trust_bootloader) + credit_init_bits(len * 8); +} +EXPORT_SYMBOL_GPL(add_bootloader_randomness); + +struct fast_pool { + struct work_struct mix; + unsigned long pool[4]; + unsigned long last; + unsigned int count; +}; + +static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { +#ifdef CONFIG_64BIT +#define FASTMIX_PERM SIPHASH_PERMUTATION + .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 } +#else +#define FASTMIX_PERM HSIPHASH_PERMUTATION + .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 } +#endif +}; + +/* + * This is [Half]SipHash-1-x, starting from an empty key. Because + * the key is fixed, it assumes that its inputs are non-malicious, + * and therefore this has no security on its own. s represents the + * four-word SipHash state, while v represents a two-word input. + */ +static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2) +{ + s[3] ^= v1; + FASTMIX_PERM(s[0], s[1], s[2], s[3]); + s[0] ^= v1; + s[3] ^= v2; + FASTMIX_PERM(s[0], s[1], s[2], s[3]); + s[0] ^= v2; +} + +#ifdef CONFIG_SMP +/* + * This function is called when the CPU has just come online, with + * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE. + */ +int __cold random_online_cpu(unsigned int cpu) +{ + /* + * During CPU shutdown and before CPU onlining, add_interrupt_ + * randomness() may schedule mix_interrupt_randomness(), and + * set the MIX_INFLIGHT flag. However, because the worker can + * be scheduled on a different CPU during this period, that + * flag will never be cleared. For that reason, we zero out + * the flag here, which runs just after workqueues are onlined + * for the CPU again. This also has the effect of setting the + * irq randomness count to zero so that new accumulated irqs + * are fresh. + */ + per_cpu_ptr(&irq_randomness, cpu)->count = 0; + return 0; +} +#endif + +static void mix_interrupt_randomness(struct work_struct *work) +{ + struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); + /* + * The size of the copied stack pool is explicitly 2 longs so that we + * only ever ingest half of the siphash output each time, retaining + * the other half as the next "key" that carries over. The entropy is + * supposed to be sufficiently dispersed between bits so on average + * we don't wind up "losing" some. + */ + unsigned long pool[2]; + unsigned int count; + + /* Check to see if we're running on the wrong CPU due to hotplug. */ + local_irq_disable(); + if (fast_pool != this_cpu_ptr(&irq_randomness)) { + local_irq_enable(); + return; + } + + /* + * Copy the pool to the stack so that the mixer always has a + * consistent view, before we reenable irqs again. + */ + memcpy(pool, fast_pool->pool, sizeof(pool)); + count = fast_pool->count; + fast_pool->count = 0; + fast_pool->last = jiffies; + local_irq_enable(); + + mix_pool_bytes(pool, sizeof(pool)); + credit_init_bits(max(1u, (count & U16_MAX) / 64)); + + memzero_explicit(pool, sizeof(pool)); +} + +void add_interrupt_randomness(int irq) +{ + enum { MIX_INFLIGHT = 1U << 31 }; + unsigned long entropy = random_get_entropy(); + struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); + struct pt_regs *regs = get_irq_regs(); + unsigned int new_count; + + fast_mix(fast_pool->pool, entropy, + (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq)); + new_count = ++fast_pool->count; + + if (new_count & MIX_INFLIGHT) + return; + + if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ)) + return; + + if (unlikely(!fast_pool->mix.func)) + INIT_WORK(&fast_pool->mix, mix_interrupt_randomness); + fast_pool->count |= MIX_INFLIGHT; + queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix); +} +EXPORT_SYMBOL_GPL(add_interrupt_randomness); + +/* There is one of these per entropy source */ +struct timer_rand_state { + unsigned long last_time; + long last_delta, last_delta2; +}; /* * This function adds entropy to the entropy "pool" by using timing - * delays. It uses the timer_rand_state structure to make an estimate - * of how many bits of entropy this call has added to the pool. - * - * The number "num" is also added to the pool - it should somehow describe - * the type of event which just happened. This is currently 0-255 for - * keyboard scan codes, and 256 upwards for interrupts. - * + * delays. It uses the timer_rand_state structure to make an estimate + * of how many bits of entropy this call has added to the pool. The + * value "num" is also added to the pool; it should somehow describe + * the type of event that just happened. */ -static void add_timer_randomness(struct timer_rand_state *state, unsigned num) +static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { - struct entropy_store *r; - struct { - long jiffies; - unsigned cycles; - unsigned num; - } sample; + unsigned long entropy = random_get_entropy(), now = jiffies, flags; long delta, delta2, delta3; + unsigned int bits; - sample.jiffies = jiffies; - sample.cycles = random_get_entropy(); - sample.num = num; - r = &input_pool; - mix_pool_bytes(r, &sample, sizeof(sample)); + /* + * If we're in a hard IRQ, add_interrupt_randomness() will be called + * sometime after, so mix into the fast pool. + */ + if (in_irq()) { + fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num); + } else { + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&entropy, sizeof(entropy)); + _mix_pool_bytes(&num, sizeof(num)); + spin_unlock_irqrestore(&input_pool.lock, flags); + } + + if (crng_ready()) + return; /* * Calculate number of bits of randomness we probably added. * We take into account the first, second and third-order deltas * in order to make our estimate. */ - delta = sample.jiffies - READ_ONCE(state->last_time); - WRITE_ONCE(state->last_time, sample.jiffies); + delta = now - READ_ONCE(state->last_time); + WRITE_ONCE(state->last_time, now); delta2 = delta - READ_ONCE(state->last_delta); WRITE_ONCE(state->last_delta, delta); @@ -1211,392 +1079,65 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) delta = delta3; /* - * delta is now minimum absolute delta. - * Round down by 1 bit on general principles, - * and limit entropy estimate to 12 bits. + * delta is now minimum absolute delta. Round down by 1 bit + * on general principles, and limit entropy estimate to 11 bits. */ - credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); + bits = min(fls(delta >> 1), 11); + + /* + * As mentioned above, if we're in a hard IRQ, add_interrupt_randomness() + * will run after this, which uses a different crediting scheme of 1 bit + * per every 64 interrupts. In order to let that function do accounting + * close to the one in this function, we credit a full 64/64 bit per bit, + * and then subtract one to account for the extra one added. + */ + if (in_irq()) + this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1; + else + _credit_init_bits(bits); } -void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) +void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) { static unsigned char last_value; + static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; - /* ignore autorepeat and the like */ + /* Ignore autorepeat and the like. */ if (value == last_value) return; last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); - trace_add_input_randomness(ENTROPY_BITS(&input_pool)); } EXPORT_SYMBOL_GPL(add_input_randomness); -static DEFINE_PER_CPU(struct fast_pool, irq_randomness); - -#ifdef ADD_INTERRUPT_BENCH -static unsigned long avg_cycles, avg_deviation; - -#define AVG_SHIFT 8 /* Exponential average factor k=1/256 */ -#define FIXED_1_2 (1 << (AVG_SHIFT-1)) - -static void add_interrupt_bench(cycles_t start) -{ - long delta = random_get_entropy() - start; - - /* Use a weighted moving average */ - delta = delta - ((avg_cycles + FIXED_1_2) >> AVG_SHIFT); - avg_cycles += delta; - /* And average deviation */ - delta = abs(delta) - ((avg_deviation + FIXED_1_2) >> AVG_SHIFT); - avg_deviation += delta; -} -#else -#define add_interrupt_bench(x) -#endif - -static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) -{ - __u32 *ptr = (__u32 *) regs; - unsigned int idx; - - if (regs == NULL) - return 0; - idx = READ_ONCE(f->reg_idx); - if (idx >= sizeof(struct pt_regs) / sizeof(__u32)) - idx = 0; - ptr += idx++; - WRITE_ONCE(f->reg_idx, idx); - return *ptr; -} - -void add_interrupt_randomness(int irq, int irq_flags) -{ - struct entropy_store *r; - struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); - struct pt_regs *regs = get_irq_regs(); - unsigned long now = jiffies; - cycles_t cycles = random_get_entropy(); - __u32 c_high, j_high; - __u64 ip; - unsigned long seed; - int credit = 0; - - if (cycles == 0) - cycles = get_reg(fast_pool, regs); - c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; - j_high = (sizeof(now) > 4) ? now >> 32 : 0; - fast_pool->pool[0] ^= cycles ^ j_high ^ irq; - fast_pool->pool[1] ^= now ^ c_high; - ip = regs ? instruction_pointer(regs) : _RET_IP_; - fast_pool->pool[2] ^= ip; - fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : - get_reg(fast_pool, regs); - - fast_mix(fast_pool); - add_interrupt_bench(cycles); - - if (unlikely(crng_init == 0)) { - if ((fast_pool->count >= 64) && - crng_fast_load((char *) fast_pool->pool, - sizeof(fast_pool->pool)) > 0) { - fast_pool->count = 0; - fast_pool->last = now; - } - return; - } - - if ((fast_pool->count < 64) && - !time_after(now, fast_pool->last + HZ)) - return; - - r = &input_pool; - if (!spin_trylock(&r->lock)) - return; - - fast_pool->last = now; - __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool)); - - /* - * If we have architectural seed generator, produce a seed and - * add it to the pool. For the sake of paranoia don't let the - * architectural seed generator dominate the input from the - * interrupt noise. - */ - if (arch_get_random_seed_long(&seed)) { - __mix_pool_bytes(r, &seed, sizeof(seed)); - credit = 1; - } - spin_unlock(&r->lock); - - fast_pool->count = 0; - - /* award one bit for the contents of the fast pool */ - credit_entropy_bits(r, credit + 1); -} -EXPORT_SYMBOL_GPL(add_interrupt_randomness); - #ifdef CONFIG_BLOCK void add_disk_randomness(struct gendisk *disk) { if (!disk || !disk->random) return; - /* first major is 1, so we get >= 0x200 here */ + /* First major is 1, so we get >= 0x200 here. */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); - trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool)); } EXPORT_SYMBOL_GPL(add_disk_randomness); -#endif -/********************************************************************* - * - * Entropy extraction routines - * - *********************************************************************/ - -/* - * This function decides how many bytes to actually take from the - * given pool, and also debits the entropy count accordingly. - */ -static size_t account(struct entropy_store *r, size_t nbytes, int min, - int reserved) +void __cold rand_initialize_disk(struct gendisk *disk) { - int entropy_count, orig, have_bytes; - size_t ibytes, nfrac; - - BUG_ON(r->entropy_count > r->poolinfo->poolfracbits); - - /* Can we pull enough? */ -retry: - entropy_count = orig = READ_ONCE(r->entropy_count); - ibytes = nbytes; - /* never pull more than available */ - have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); - - if ((have_bytes -= reserved) < 0) - have_bytes = 0; - ibytes = min_t(size_t, ibytes, have_bytes); - if (ibytes < min) - ibytes = 0; - - if (WARN_ON(entropy_count < 0)) { - pr_warn("negative entropy count: pool %s count %d\n", - r->name, entropy_count); - entropy_count = 0; - } - nfrac = ibytes << (ENTROPY_SHIFT + 3); - if ((size_t) entropy_count > nfrac) - entropy_count -= nfrac; - else - entropy_count = 0; - - if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) - goto retry; - - trace_debit_entropy(r->name, 8 * ibytes); - if (ibytes && ENTROPY_BITS(r) < random_write_wakeup_bits) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } - - return ibytes; -} - -/* - * This function does the actual extraction for extract_entropy and - * extract_entropy_user. - * - * Note: we assume that .poolwords is a multiple of 16 words. - */ -static void extract_buf(struct entropy_store *r, __u8 *out) -{ - int i; - union { - __u32 w[5]; - unsigned long l[LONGS(20)]; - } hash; - __u32 workspace[SHA1_WORKSPACE_WORDS]; - unsigned long flags; + struct timer_rand_state *state; /* - * If we have an architectural hardware random number - * generator, use it for SHA's initial vector + * If kzalloc returns null, we just won't use that entropy + * source. */ - sha1_init(hash.w); - for (i = 0; i < LONGS(20); i++) { - unsigned long v; - if (!arch_get_random_long(&v)) - break; - hash.l[i] = v; + state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); + if (state) { + state->last_time = INITIAL_JIFFIES; + disk->random = state; } - - /* Generate a hash across the pool, 16 words (512 bits) at a time */ - spin_lock_irqsave(&r->lock, flags); - for (i = 0; i < r->poolinfo->poolwords; i += 16) - sha1_transform(hash.w, (__u8 *)(r->pool + i), workspace); - - /* - * We mix the hash back into the pool to prevent backtracking - * attacks (where the attacker knows the state of the pool - * plus the current outputs, and attempts to find previous - * ouputs), unless the hash function can be inverted. By - * mixing at least a SHA1 worth of hash data back, we make - * brute-forcing the feedback as hard as brute-forcing the - * hash. - */ - __mix_pool_bytes(r, hash.w, sizeof(hash.w)); - spin_unlock_irqrestore(&r->lock, flags); - - memzero_explicit(workspace, sizeof(workspace)); - - /* - * In case the hash function has some recognizable output - * pattern, we fold it in half. Thus, we always feed back - * twice as much data as we output. - */ - hash.w[0] ^= hash.w[3]; - hash.w[1] ^= hash.w[4]; - hash.w[2] ^= rol32(hash.w[2], 16); - - memcpy(out, &hash, EXTRACT_SIZE); - memzero_explicit(&hash, sizeof(hash)); } - -static ssize_t _extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int fips) -{ - ssize_t ret = 0, i; - __u8 tmp[EXTRACT_SIZE]; - unsigned long flags; - - while (nbytes) { - extract_buf(r, tmp); - - if (fips) { - spin_lock_irqsave(&r->lock, flags); - if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) - panic("Hardware RNG duplicated output!\n"); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - spin_unlock_irqrestore(&r->lock, flags); - } - i = min_t(int, nbytes, EXTRACT_SIZE); - memcpy(buf, tmp, i); - nbytes -= i; - buf += i; - ret += i; - } - - /* Wipe data just returned from memory */ - memzero_explicit(tmp, sizeof(tmp)); - - return ret; -} - -/* - * This function extracts randomness from the "entropy pool", and - * returns it in a buffer. - * - * The min parameter specifies the minimum amount we can pull before - * failing to avoid races that defeat catastrophic reseeding while the - * reserved parameter indicates how much entropy we must leave in the - * pool after each pull to avoid starving other readers. - */ -static ssize_t extract_entropy(struct entropy_store *r, void *buf, - size_t nbytes, int min, int reserved) -{ - __u8 tmp[EXTRACT_SIZE]; - unsigned long flags; - - /* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */ - if (fips_enabled) { - spin_lock_irqsave(&r->lock, flags); - if (!r->last_data_init) { - r->last_data_init = 1; - spin_unlock_irqrestore(&r->lock, flags); - trace_extract_entropy(r->name, EXTRACT_SIZE, - ENTROPY_BITS(r), _RET_IP_); - extract_buf(r, tmp); - spin_lock_irqsave(&r->lock, flags); - memcpy(r->last_data, tmp, EXTRACT_SIZE); - } - spin_unlock_irqrestore(&r->lock, flags); - } - - trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); - nbytes = account(r, nbytes, min, reserved); - - return _extract_entropy(r, buf, nbytes, fips_enabled); -} - -#define warn_unseeded_randomness(previous) \ - _warn_unseeded_randomness(__func__, (void *) _RET_IP_, (previous)) - -static void _warn_unseeded_randomness(const char *func_name, void *caller, - void **previous) -{ -#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM - const bool print_once = false; -#else - static bool print_once __read_mostly; #endif - if (print_once || - crng_ready() || - (previous && (caller == READ_ONCE(*previous)))) - return; - WRITE_ONCE(*previous, caller); -#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM - print_once = true; -#endif - if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS " - "with crng_init=%d\n", func_name, caller, - crng_init); -} - -/* - * This function is the exported kernel interface. It returns some - * number of good random numbers, suitable for key generation, seeding - * TCP sequence numbers, etc. It does not rely on the hardware random - * number generator. For random bytes direct from the hardware RNG - * (when available), use get_random_bytes_arch(). In order to ensure - * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once - * at any point prior. - */ -static void _get_random_bytes(void *buf, int nbytes) -{ - __u8 tmp[CHACHA_BLOCK_SIZE] __aligned(4); - - trace_get_random_bytes(nbytes, _RET_IP_); - - while (nbytes >= CHACHA_BLOCK_SIZE) { - extract_crng(buf); - buf += CHACHA_BLOCK_SIZE; - nbytes -= CHACHA_BLOCK_SIZE; - } - - if (nbytes > 0) { - extract_crng(tmp); - memcpy(buf, tmp, nbytes); - crng_backtrack_protect(tmp, nbytes); - } else - crng_backtrack_protect(tmp, CHACHA_BLOCK_SIZE); - memzero_explicit(tmp, sizeof(tmp)); -} - -void get_random_bytes(void *buf, int nbytes) -{ - static void *previous; - - warn_unseeded_randomness(&previous); - _get_random_bytes(buf, nbytes); -} -EXPORT_SYMBOL(get_random_bytes); - - /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another @@ -1610,93 +1151,396 @@ EXPORT_SYMBOL(get_random_bytes); * * So the re-arming always happens in the entropy loop itself. */ -static void entropy_timer(struct timer_list *t) +static void __cold entropy_timer(struct timer_list *t) { - credit_entropy_bits(&input_pool, 1); + credit_init_bits(1); } /* * If we have an actual cycle counter, see if we can * generate enough entropy with timing noise */ -static void try_to_generate_entropy(void) +static void __cold try_to_generate_entropy(void) { struct { - unsigned long now; + unsigned long entropy; struct timer_list timer; } stack; - stack.now = random_get_entropy(); + stack.entropy = random_get_entropy(); /* Slow counter - or none. Don't even bother */ - if (stack.now == random_get_entropy()) + if (stack.entropy == random_get_entropy()) return; timer_setup_on_stack(&stack.timer, entropy_timer, 0); - while (!crng_ready()) { + while (!crng_ready() && !signal_pending(current)) { if (!timer_pending(&stack.timer)) - mod_timer(&stack.timer, jiffies+1); - mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); + mod_timer(&stack.timer, jiffies + 1); + mix_pool_bytes(&stack.entropy, sizeof(stack.entropy)); schedule(); - stack.now = random_get_entropy(); + stack.entropy = random_get_entropy(); } del_timer_sync(&stack.timer); destroy_timer_on_stack(&stack.timer); - mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); + mix_pool_bytes(&stack.entropy, sizeof(stack.entropy)); } -/* - * Wait for the urandom pool to be seeded and thus guaranteed to supply - * cryptographically secure random numbers. This applies to: the /dev/urandom - * device, the get_random_bytes function, and the get_random_{u32,u64,int,long} - * family of functions. Using any of these functions without first calling - * this function forfeits the guarantee of security. + +/********************************************************************** * - * Returns: 0 if the urandom pool has been seeded. - * -ERESTARTSYS if the function was interrupted by a signal. - */ -int wait_for_random_bytes(void) + * Userspace reader/writer interfaces. + * + * getrandom(2) is the primary modern interface into the RNG and should + * be used in preference to anything else. + * + * Reading from /dev/random has the same functionality as calling + * getrandom(2) with flags=0. In earlier versions, however, it had + * vastly different semantics and should therefore be avoided, to + * prevent backwards compatibility issues. + * + * Reading from /dev/urandom has the same functionality as calling + * getrandom(2) with flags=GRND_INSECURE. Because it does not block + * waiting for the RNG to be ready, it should not be used. + * + * Writing to either /dev/random or /dev/urandom adds entropy to + * the input pool but does not credit it. + * + * Polling on /dev/random indicates when the RNG is initialized, on + * the read side, and when it wants new entropy, on the write side. + * + * Both /dev/random and /dev/urandom have the same set of ioctls for + * adding entropy, getting the entropy count, zeroing the count, and + * reseeding the crng. + * + **********************************************************************/ + +SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags) { - if (likely(crng_ready())) + struct iov_iter iter; + struct iovec iov; + int ret; + + if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) + return -EINVAL; + + /* + * Requesting insecure and blocking randomness at the same time makes + * no sense. + */ + if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) + return -EINVAL; + + if (!crng_ready() && !(flags & GRND_INSECURE)) { + if (flags & GRND_NONBLOCK) + return -EAGAIN; + ret = wait_for_random_bytes(); + if (unlikely(ret)) + return ret; + } + + ret = import_single_range(READ, ubuf, len, &iov, &iter); + if (unlikely(ret)) + return ret; + return get_random_bytes_user(&iter); +} + +static __poll_t random_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &crng_init_wait, wait); + return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM; +} + +static ssize_t write_pool_user(struct iov_iter *iter) +{ + u8 block[BLAKE2S_BLOCK_SIZE]; + ssize_t ret = 0; + size_t copied; + + if (unlikely(!iov_iter_count(iter))) return 0; - do { - int ret; - ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); - if (ret) - return ret > 0 ? 0 : ret; + for (;;) { + copied = copy_from_iter(block, sizeof(block), iter); + ret += copied; + mix_pool_bytes(block, copied); + if (!iov_iter_count(iter) || copied != sizeof(block)) + break; - try_to_generate_entropy(); - } while (!crng_ready()); + BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); + if (ret % PAGE_SIZE == 0) { + if (signal_pending(current)) + break; + cond_resched(); + } + } - return 0; + memzero_explicit(block, sizeof(block)); + return ret ? ret : -EFAULT; } -EXPORT_SYMBOL(wait_for_random_bytes); -/* - * Returns whether or not the urandom pool has been seeded and thus guaranteed - * to supply cryptographically secure random numbers. This applies to: the - * /dev/urandom device, the get_random_bytes function, and the get_random_{u32, - * ,u64,int,long} family of functions. - * - * Returns: true if the urandom pool has been seeded. - * false if the urandom pool has not been seeded. - */ -bool rng_is_initialized(void) +static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter) { - return crng_ready(); + return write_pool_user(iter); } -EXPORT_SYMBOL(rng_is_initialized); + +static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter) +{ + static int maxwarn = 10; + + if (!crng_ready()) { + if (!ratelimit_disable && maxwarn <= 0) + ++urandom_warning.missed; + else if (ratelimit_disable || __ratelimit(&urandom_warning)) { + --maxwarn; + pr_notice("%s: uninitialized urandom read (%zu bytes read)\n", + current->comm, iov_iter_count(iter)); + } + } + + return get_random_bytes_user(iter); +} + +static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter) +{ + int ret; + + ret = wait_for_random_bytes(); + if (ret != 0) + return ret; + return get_random_bytes_user(iter); +} + +static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + int __user *p = (int __user *)arg; + int ent_count; + + switch (cmd) { + case RNDGETENTCNT: + /* Inherently racy, no point locking. */ + if (put_user(input_pool.init_bits, p)) + return -EFAULT; + return 0; + case RNDADDTOENTCNT: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ent_count, p)) + return -EFAULT; + if (ent_count < 0) + return -EINVAL; + credit_init_bits(ent_count); + return 0; + case RNDADDENTROPY: { + struct iov_iter iter; + struct iovec iov; + ssize_t ret; + int len; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ent_count, p++)) + return -EFAULT; + if (ent_count < 0) + return -EINVAL; + if (get_user(len, p++)) + return -EFAULT; + ret = import_single_range(WRITE, p, len, &iov, &iter); + if (unlikely(ret)) + return ret; + ret = write_pool_user(&iter); + if (unlikely(ret < 0)) + return ret; + /* Since we're crediting, enforce that it was all written into the pool. */ + if (unlikely(ret != len)) + return -EFAULT; + credit_init_bits(ent_count); + return 0; + } + case RNDZAPENTCNT: + case RNDCLEARPOOL: + /* No longer has any effect. */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return 0; + case RNDRESEEDCRNG: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!crng_ready()) + return -ENODATA; + crng_reseed(); + return 0; + default: + return -EINVAL; + } +} + +static int random_fasync(int fd, struct file *filp, int on) +{ + return fasync_helper(fd, filp, on, &fasync); +} + +const struct file_operations random_fops = { + .read_iter = random_read_iter, + .write_iter = random_write_iter, + .poll = random_poll, + .unlocked_ioctl = random_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .fasync = random_fasync, + .llseek = noop_llseek, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, +}; + +const struct file_operations urandom_fops = { + .read_iter = urandom_read_iter, + .write_iter = random_write_iter, + .unlocked_ioctl = random_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .fasync = random_fasync, + .llseek = noop_llseek, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, +}; + + +/******************************************************************** + * + * Sysctl interface. + * + * These are partly unused legacy knobs with dummy values to not break + * userspace and partly still useful things. They are usually accessible + * in /proc/sys/kernel/random/ and are as follows: + * + * - boot_id - a UUID representing the current boot. + * + * - uuid - a random UUID, different each time the file is read. + * + * - poolsize - the number of bits of entropy that the input pool can + * hold, tied to the POOL_BITS constant. + * + * - entropy_avail - the number of bits of entropy currently in the + * input pool. Always <= poolsize. + * + * - write_wakeup_threshold - the amount of entropy in the input pool + * below which write polls to /dev/random will unblock, requesting + * more entropy, tied to the POOL_READY_BITS constant. It is writable + * to avoid breaking old userspaces, but writing to it does not + * change any behavior of the RNG. + * + * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL. + * It is writable to avoid breaking old userspaces, but writing + * to it does not change any behavior of the RNG. + * + ********************************************************************/ + +#ifdef CONFIG_SYSCTL + +#include + +static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ; +static int sysctl_random_write_wakeup_bits = POOL_READY_BITS; +static int sysctl_poolsize = POOL_BITS; +static u8 sysctl_bootid[UUID_SIZE]; /* - * Add a callback function that will be invoked when the nonblocking - * pool is initialised. - * - * returns: 0 if callback is successfully added - * -EALREADY if pool is already initialised (callback not called) - * -ENOENT if module for callback is not alive + * This function is used to return both the bootid UUID, and random + * UUID. The difference is in whether table->data is NULL; if it is, + * then a new UUID is generated and returned to the user. */ +static int proc_do_uuid(struct ctl_table *table, int write, void *buf, + size_t *lenp, loff_t *ppos) +{ + u8 tmp_uuid[UUID_SIZE], *uuid; + char uuid_string[UUID_STRING_LEN + 1]; + struct ctl_table fake_table = { + .data = uuid_string, + .maxlen = UUID_STRING_LEN + }; + + if (write) + return -EPERM; + + uuid = table->data; + if (!uuid) { + uuid = tmp_uuid; + generate_random_uuid(uuid); + } else { + static DEFINE_SPINLOCK(bootid_spinlock); + + spin_lock(&bootid_spinlock); + if (!uuid[8]) + generate_random_uuid(uuid); + spin_unlock(&bootid_spinlock); + } + + snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid); + return proc_dostring(&fake_table, 0, buf, lenp, ppos); +} + +/* The same as proc_dointvec, but writes don't change anything. */ +static int proc_do_rointvec(struct ctl_table *table, int write, void *buf, + size_t *lenp, loff_t *ppos) +{ + return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos); +} + +extern struct ctl_table random_table[]; +struct ctl_table random_table[] = { + { + .procname = "poolsize", + .data = &sysctl_poolsize, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "entropy_avail", + .data = &input_pool.init_bits, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "write_wakeup_threshold", + .data = &sysctl_random_write_wakeup_bits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_do_rointvec, + }, + { + .procname = "urandom_min_reseed_secs", + .data = &sysctl_random_min_urandom_seed, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_do_rointvec, + }, + { + .procname = "boot_id", + .data = &sysctl_bootid, + .mode = 0444, + .proc_handler = proc_do_uuid, + }, + { + .procname = "uuid", + .mode = 0444, + .proc_handler = proc_do_uuid, + }, + { } +}; +#endif /* CONFIG_SYSCTL */ + +/* + * Android KABI fixups + * + * Add back two functions that were being used by out-of-tree drivers. + * + * Yes, horrible hack, the things we do for FIPS "compliance"... + */ +static DEFINE_SPINLOCK(random_ready_list_lock); +static LIST_HEAD(random_ready_list); + int add_random_ready_callback(struct random_ready_callback *rdy) { struct module *owner; @@ -1728,9 +1572,6 @@ int add_random_ready_callback(struct random_ready_callback *rdy) } EXPORT_SYMBOL(add_random_ready_callback); -/* - * Delete a previously registered readiness callback function. - */ void del_random_ready_callback(struct random_ready_callback *rdy) { unsigned long flags; @@ -1747,612 +1588,18 @@ void del_random_ready_callback(struct random_ready_callback *rdy) } EXPORT_SYMBOL(del_random_ready_callback); -/* - * This function will use the architecture-specific hardware random - * number generator if it is available. The arch-specific hw RNG will - * almost certainly be faster than what we can do in software, but it - * is impossible to verify that it is implemented securely (as - * opposed, to, say, the AES encryption of a sequence number using a - * key known by the NSA). So it's useful if we need the speed, but - * only if we're willing to trust the hardware manufacturer not to - * have put in a back door. - * - * Return number of bytes filled in. - */ -int __must_check get_random_bytes_arch(void *buf, int nbytes) -{ - int left = nbytes; - char *p = buf; - - trace_get_random_bytes_arch(left, _RET_IP_); - while (left) { - unsigned long v; - int chunk = min_t(int, left, sizeof(unsigned long)); - - if (!arch_get_random_long(&v)) - break; - - memcpy(p, &v, chunk); - p += chunk; - left -= chunk; - } - - return nbytes - left; -} -EXPORT_SYMBOL(get_random_bytes_arch); - -/* - * init_std_data - initialize pool with system data - * - * @r: pool to initialize - * - * This function clears the pool's entropy count and mixes some system - * data into the pool to prepare it for use. The pool is not cleared - * as that can only decrease the entropy in the pool. - */ -static void __init init_std_data(struct entropy_store *r) -{ - int i; - ktime_t now = ktime_get_real(); - unsigned long rv; - - mix_pool_bytes(r, &now, sizeof(now)); - for (i = r->poolinfo->poolbytes; i > 0; i -= sizeof(rv)) { - if (!arch_get_random_seed_long(&rv) && - !arch_get_random_long(&rv)) - rv = random_get_entropy(); - mix_pool_bytes(r, &rv, sizeof(rv)); - } - mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); -} - -/* - * Note that setup_arch() may call add_device_randomness() - * long before we get here. This allows seeding of the pools - * with some platform dependent data very early in the boot - * process. But it limits our options here. We must use - * statically allocated structures that already have all - * initializations complete at compile time. We should also - * take care not to overwrite the precious per platform data - * we were given. - */ -int __init rand_initialize(void) -{ - init_std_data(&input_pool); - if (crng_need_final_init) - crng_finalize_init(&primary_crng); - crng_initialize_primary(&primary_crng); - crng_global_init_time = jiffies; - if (ratelimit_disable) { - urandom_warning.interval = 0; - unseeded_warning.interval = 0; - } - return 0; -} - -#ifdef CONFIG_BLOCK -void rand_initialize_disk(struct gendisk *disk) -{ - struct timer_rand_state *state; - - /* - * If kzalloc returns null, we just won't use that entropy - * source. - */ - state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); - if (state) { - state->last_time = INITIAL_JIFFIES; - disk->random = state; - } -} -#endif - -static ssize_t -urandom_read_nowarn(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) -{ - int ret; - - nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3)); - ret = extract_crng_user(buf, nbytes); - trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS(&input_pool)); - return ret; -} - -static ssize_t -urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) +static void process_oldschool_random_ready_list(void) { unsigned long flags; - static int maxwarn = 10; + struct random_ready_callback *rdy, *tmp; - if (!crng_ready() && maxwarn > 0) { - maxwarn--; - if (__ratelimit(&urandom_warning)) - pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", - current->comm, nbytes); - spin_lock_irqsave(&primary_crng.lock, flags); - crng_init_cnt = 0; - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_lock_irqsave(&random_ready_list_lock, flags); + list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) { + struct module *owner = rdy->owner; + + list_del_init(&rdy->list); + rdy->func(rdy); + module_put(owner); } - - return urandom_read_nowarn(file, buf, nbytes, ppos); + spin_unlock_irqrestore(&random_ready_list_lock, flags); } - -static ssize_t -random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) -{ - int ret; - - ret = wait_for_random_bytes(); - if (ret != 0) - return ret; - return urandom_read_nowarn(file, buf, nbytes, ppos); -} - -static __poll_t -random_poll(struct file *file, poll_table * wait) -{ - __poll_t mask; - - poll_wait(file, &crng_init_wait, wait); - poll_wait(file, &random_write_wait, wait); - mask = 0; - if (crng_ready()) - mask |= EPOLLIN | EPOLLRDNORM; - if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits) - mask |= EPOLLOUT | EPOLLWRNORM; - return mask; -} - -static int -write_pool(struct entropy_store *r, const char __user *buffer, size_t count) -{ - size_t bytes; - __u32 t, buf[16]; - const char __user *p = buffer; - - while (count > 0) { - int b, i = 0; - - bytes = min(count, sizeof(buf)); - if (copy_from_user(&buf, p, bytes)) - return -EFAULT; - - for (b = bytes ; b > 0 ; b -= sizeof(__u32), i++) { - if (!arch_get_random_int(&t)) - break; - buf[i] ^= t; - } - - count -= bytes; - p += bytes; - - mix_pool_bytes(r, buf, bytes); - cond_resched(); - } - - return 0; -} - -static ssize_t random_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - size_t ret; - - ret = write_pool(&input_pool, buffer, count); - if (ret) - return ret; - - return (ssize_t)count; -} - -static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) -{ - int size, ent_count; - int __user *p = (int __user *)arg; - int retval; - - switch (cmd) { - case RNDGETENTCNT: - /* inherently racy, no point locking */ - ent_count = ENTROPY_BITS(&input_pool); - if (put_user(ent_count, p)) - return -EFAULT; - return 0; - case RNDADDTOENTCNT: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (get_user(ent_count, p)) - return -EFAULT; - return credit_entropy_bits_safe(&input_pool, ent_count); - case RNDADDENTROPY: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (get_user(ent_count, p++)) - return -EFAULT; - if (ent_count < 0) - return -EINVAL; - if (get_user(size, p++)) - return -EFAULT; - retval = write_pool(&input_pool, (const char __user *)p, - size); - if (retval < 0) - return retval; - return credit_entropy_bits_safe(&input_pool, ent_count); - case RNDZAPENTCNT: - case RNDCLEARPOOL: - /* - * Clear the entropy pool counters. We no longer clear - * the entropy pool, as that's silly. - */ - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (xchg(&input_pool.entropy_count, 0) && random_write_wakeup_bits) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } - return 0; - case RNDRESEEDCRNG: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (crng_init < 2) - return -ENODATA; - crng_reseed(&primary_crng, &input_pool); - WRITE_ONCE(crng_global_init_time, jiffies - 1); - return 0; - default: - return -EINVAL; - } -} - -static int random_fasync(int fd, struct file *filp, int on) -{ - return fasync_helper(fd, filp, on, &fasync); -} - -const struct file_operations random_fops = { - .read = random_read, - .write = random_write, - .poll = random_poll, - .unlocked_ioctl = random_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .fasync = random_fasync, - .llseek = noop_llseek, -}; - -const struct file_operations urandom_fops = { - .read = urandom_read, - .write = random_write, - .unlocked_ioctl = random_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .fasync = random_fasync, - .llseek = noop_llseek, -}; - -SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, - unsigned int, flags) -{ - int ret; - - if (flags & ~(GRND_NONBLOCK|GRND_RANDOM|GRND_INSECURE)) - return -EINVAL; - - /* - * Requesting insecure and blocking randomness at the same time makes - * no sense. - */ - if ((flags & (GRND_INSECURE|GRND_RANDOM)) == (GRND_INSECURE|GRND_RANDOM)) - return -EINVAL; - - if (count > INT_MAX) - count = INT_MAX; - - if (!(flags & GRND_INSECURE) && !crng_ready()) { - if (flags & GRND_NONBLOCK) - return -EAGAIN; - ret = wait_for_random_bytes(); - if (unlikely(ret)) - return ret; - } - return urandom_read_nowarn(NULL, buf, count, NULL); -} - -/******************************************************************** - * - * Sysctl interface - * - ********************************************************************/ - -#ifdef CONFIG_SYSCTL - -#include - -static int min_write_thresh; -static int max_write_thresh = INPUT_POOL_WORDS * 32; -static int random_min_urandom_seed = 60; -static char sysctl_bootid[16]; - -/* - * This function is used to return both the bootid UUID, and random - * UUID. The difference is in whether table->data is NULL; if it is, - * then a new UUID is generated and returned to the user. - * - * If the user accesses this via the proc interface, the UUID will be - * returned as an ASCII string in the standard UUID format; if via the - * sysctl system call, as 16 bytes of binary data. - */ -static int proc_do_uuid(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table fake_table; - unsigned char buf[64], tmp_uuid[16], *uuid; - - uuid = table->data; - if (!uuid) { - uuid = tmp_uuid; - generate_random_uuid(uuid); - } else { - static DEFINE_SPINLOCK(bootid_spinlock); - - spin_lock(&bootid_spinlock); - if (!uuid[8]) - generate_random_uuid(uuid); - spin_unlock(&bootid_spinlock); - } - - sprintf(buf, "%pU", uuid); - - fake_table.data = buf; - fake_table.maxlen = sizeof(buf); - - return proc_dostring(&fake_table, write, buffer, lenp, ppos); -} - -/* - * Return entropy available scaled to integral bits - */ -static int proc_do_entropy(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table fake_table; - int entropy_count; - - entropy_count = *(int *)table->data >> ENTROPY_SHIFT; - - fake_table.data = &entropy_count; - fake_table.maxlen = sizeof(entropy_count); - - return proc_dointvec(&fake_table, write, buffer, lenp, ppos); -} - -static int sysctl_poolsize = INPUT_POOL_WORDS * 32; -extern struct ctl_table random_table[]; -struct ctl_table random_table[] = { - { - .procname = "poolsize", - .data = &sysctl_poolsize, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "entropy_avail", - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_do_entropy, - .data = &input_pool.entropy_count, - }, - { - .procname = "write_wakeup_threshold", - .data = &random_write_wakeup_bits, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_write_thresh, - .extra2 = &max_write_thresh, - }, - { - .procname = "urandom_min_reseed_secs", - .data = &random_min_urandom_seed, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "boot_id", - .data = &sysctl_bootid, - .maxlen = 16, - .mode = 0444, - .proc_handler = proc_do_uuid, - }, - { - .procname = "uuid", - .maxlen = 16, - .mode = 0444, - .proc_handler = proc_do_uuid, - }, -#ifdef ADD_INTERRUPT_BENCH - { - .procname = "add_interrupt_avg_cycles", - .data = &avg_cycles, - .maxlen = sizeof(avg_cycles), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "add_interrupt_avg_deviation", - .data = &avg_deviation, - .maxlen = sizeof(avg_deviation), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, -#endif - { } -}; -#endif /* CONFIG_SYSCTL */ - -struct batched_entropy { - union { - u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)]; - u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)]; - }; - unsigned int position; - spinlock_t batch_lock; -}; - -/* - * Get a random word for internal kernel use only. The quality of the random - * number is good as /dev/urandom, but there is no backtrack protection, with - * the goal of being quite fast and not depleting entropy. In order to ensure - * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once at any - * point prior. - */ -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), -}; - -u64 get_random_u64(void) -{ - u64 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - - warn_unseeded_randomness(&previous); - - batch = raw_cpu_ptr(&batched_entropy_u64); - spin_lock_irqsave(&batch->batch_lock, flags); - if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { - extract_crng((u8 *)batch->entropy_u64); - batch->position = 0; - } - ret = batch->entropy_u64[batch->position++]; - spin_unlock_irqrestore(&batch->batch_lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u64); - -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), -}; -u32 get_random_u32(void) -{ - u32 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - - warn_unseeded_randomness(&previous); - - batch = raw_cpu_ptr(&batched_entropy_u32); - spin_lock_irqsave(&batch->batch_lock, flags); - if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { - extract_crng((u8 *)batch->entropy_u32); - batch->position = 0; - } - ret = batch->entropy_u32[batch->position++]; - spin_unlock_irqrestore(&batch->batch_lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u32); - -/* It's important to invalidate all potential batched entropy that might - * be stored before the crng is initialized, which we can do lazily by - * simply resetting the counter to zero so that it's re-extracted on the - * next usage. */ -static void invalidate_batched_entropy(void) -{ - int cpu; - unsigned long flags; - - for_each_possible_cpu (cpu) { - struct batched_entropy *batched_entropy; - - batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); - spin_lock_irqsave(&batched_entropy->batch_lock, flags); - batched_entropy->position = 0; - spin_unlock(&batched_entropy->batch_lock); - - batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu); - spin_lock(&batched_entropy->batch_lock); - batched_entropy->position = 0; - spin_unlock_irqrestore(&batched_entropy->batch_lock, flags); - } -} - -/** - * randomize_page - Generate a random, page aligned address - * @start: The smallest acceptable address the caller will take. - * @range: The size of the area, starting at @start, within which the - * random address must fall. - * - * If @start + @range would overflow, @range is capped. - * - * NOTE: Historical use of randomize_range, which this replaces, presumed that - * @start was already page aligned. We now align it regardless. - * - * Return: A page aligned address within [start, start + range). On error, - * @start is returned. - */ -unsigned long -randomize_page(unsigned long start, unsigned long range) -{ - if (!PAGE_ALIGNED(start)) { - range -= PAGE_ALIGN(start) - start; - start = PAGE_ALIGN(start); - } - - if (start > ULONG_MAX - range) - range = ULONG_MAX - start; - - range >>= PAGE_SHIFT; - - if (range == 0) - return start; - - return start + (get_random_long() % range << PAGE_SHIFT); -} - -/* Interface for in-kernel drivers of true hardware RNGs. - * Those devices may produce endless random bits and will be throttled - * when our pool is full. - */ -void add_hwgenerator_randomness(const char *buffer, size_t count, - size_t entropy) -{ - struct entropy_store *poolp = &input_pool; - - if (unlikely(crng_init == 0)) { - size_t ret = crng_fast_load(buffer, count); - count -= ret; - buffer += ret; - if (!count || crng_init == 0) - return; - } - - /* Suspend writing if we're above the trickle threshold. - * We'll be woken up again once below random_write_wakeup_thresh, - * or when the calling thread is about to terminate. - */ - wait_event_interruptible(random_write_wait, - !system_wq || kthread_should_stop() || - ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); - mix_pool_bytes(poolp, buffer, count); - credit_entropy_bits(poolp, entropy); -} -EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); - -/* Handle random seed passed by bootloader. - * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise - * it would be regarded as device data. - * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. - */ -void add_bootloader_randomness(const void *buf, unsigned int size) -{ - if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) - add_hwgenerator_randomness(buf, size, size * 8); - else - add_device_randomness(buf, size); -} -EXPORT_SYMBOL_GPL(add_bootloader_randomness); diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index c84d23951219..d0e11d7a3c08 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -400,7 +400,16 @@ ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, u32 *value, if (!rc) { out = (struct tpm2_get_cap_out *) &buf.data[TPM_HEADER_SIZE]; - *value = be32_to_cpu(out->value); + /* + * To prevent failing boot up of some systems, Infineon TPM2.0 + * returns SUCCESS on TPM2_Startup in field upgrade mode. Also + * the TPM2_Getcapability command returns a zero length list + * in field upgrade mode. + */ + if (be32_to_cpu(out->property_cnt) > 0) + *value = be32_to_cpu(out->value); + else + rc = -ENODATA; } tpm_buf_destroy(&buf); return rc; diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 3ca7528322f5..a1ec722d62a7 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -683,6 +683,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, if (!wait_event_timeout(ibmvtpm->crq_queue.wq, ibmvtpm->rtce_buf != NULL, HZ)) { + rc = -ENODEV; dev_err(dev, "CRQ response timed out\n"); goto init_irq_cleanup; } diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index b656d25a9767..fe772baeb15f 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -106,6 +106,10 @@ static void clk_generated_best_diff(struct clk_rate_request *req, tmp_rate = parent_rate; else tmp_rate = parent_rate / div; + + if (tmp_rate < req->min_rate || tmp_rate > req->max_rate) + return; + tmp_diff = abs(req->rate - tmp_rate); if (*best_diff < 0 || *best_diff >= tmp_diff) { diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c index 0391f5bda5e4..36e8d619e334 100644 --- a/drivers/clk/imx/clk-imx8mp.c +++ b/drivers/clk/imx/clk-imx8mp.c @@ -691,7 +691,7 @@ static int imx8mp_clocks_probe(struct platform_device *pdev) hws[IMX8MP_CLK_UART2_ROOT] = imx_clk_hw_gate4("uart2_root_clk", "uart2", ccm_base + 0x44a0, 0); hws[IMX8MP_CLK_UART3_ROOT] = imx_clk_hw_gate4("uart3_root_clk", "uart3", ccm_base + 0x44b0, 0); hws[IMX8MP_CLK_UART4_ROOT] = imx_clk_hw_gate4("uart4_root_clk", "uart4", ccm_base + 0x44c0, 0); - hws[IMX8MP_CLK_USB_ROOT] = imx_clk_hw_gate4("usb_root_clk", "osc_32k", ccm_base + 0x44d0, 0); + hws[IMX8MP_CLK_USB_ROOT] = imx_clk_hw_gate4("usb_root_clk", "hsio_axi", ccm_base + 0x44d0, 0); hws[IMX8MP_CLK_USB_PHY_ROOT] = imx_clk_hw_gate4("usb_phy_root_clk", "usb_phy_ref", ccm_base + 0x44f0, 0); hws[IMX8MP_CLK_USDHC1_ROOT] = imx_clk_hw_gate4("usdhc1_root_clk", "usdhc1", ccm_base + 0x4510, 0); hws[IMX8MP_CLK_USDHC2_ROOT] = imx_clk_hw_gate4("usdhc2_root_clk", "usdhc2", ccm_base + 0x4520, 0); diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index ba04cb381cd3..7c617d8dff3f 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -472,4 +472,3 @@ void __init hv_init_clocksource(void) hv_sched_clock_offset = hv_read_reference_counter(); hv_setup_sched_clock(read_hv_sched_clock_msr); } -EXPORT_SYMBOL_GPL(hv_init_clocksource); diff --git a/drivers/clocksource/timer-ixp4xx.c b/drivers/clocksource/timer-ixp4xx.c index 9396745e1c17..ad904bbbac6f 100644 --- a/drivers/clocksource/timer-ixp4xx.c +++ b/drivers/clocksource/timer-ixp4xx.c @@ -258,7 +258,6 @@ void __init ixp4xx_timer_setup(resource_size_t timerbase, } ixp4xx_timer_register(base, timer_irq, timer_freq); } -EXPORT_SYMBOL_GPL(ixp4xx_timer_setup); #ifdef CONFIG_OF static __init int ixp4xx_of_timer_init(struct device_node *np) diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c index 56c0cc32d0ac..d514b44e67dd 100644 --- a/drivers/clocksource/timer-oxnas-rps.c +++ b/drivers/clocksource/timer-oxnas-rps.c @@ -236,7 +236,7 @@ static int __init oxnas_rps_timer_init(struct device_node *np) } rps->irq = irq_of_parse_and_map(np, 0); - if (rps->irq < 0) { + if (!rps->irq) { ret = -EINVAL; goto err_iomap; } diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index c51c5ed15aa7..0e7748df4be3 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -32,7 +32,7 @@ static int riscv_clock_next_event(unsigned long delta, static unsigned int riscv_clock_event_irq; static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, .rating = 100, .set_next_event = riscv_clock_next_event, }; diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c index 6e8ad4a4ea3c..bedd3570474b 100644 --- a/drivers/clocksource/timer-sp804.c +++ b/drivers/clocksource/timer-sp804.c @@ -274,6 +274,11 @@ static int __init sp804_of_init(struct device_node *np, struct sp804_timer *time struct clk *clk1, *clk2; const char *name = of_get_property(np, "compatible", NULL); + if (initialized) { + pr_debug("%pOF: skipping further SP804 timer device\n", np); + return 0; + } + base = of_iomap(np, 0); if (!base) return -ENXIO; @@ -285,11 +290,6 @@ static int __init sp804_of_init(struct device_node *np, struct sp804_timer *time writel(0, timer1_base + timer->ctrl); writel(0, timer2_base + timer->ctrl); - if (initialized || !of_device_is_available(np)) { - ret = -EINVAL; - goto err; - } - clk1 = of_clk_get(np, 0); if (IS_ERR(clk1)) clk1 = NULL; diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index a310372dc53e..82f6592bbadb 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -44,6 +44,8 @@ struct mtk_cpu_dvfs_info { bool need_voltage_tracking; }; +static struct platform_device *cpufreq_pdev; + static LIST_HEAD(dvfs_info_list); static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu) @@ -546,7 +548,6 @@ static int __init mtk_cpufreq_driver_init(void) { struct device_node *np; const struct of_device_id *match; - struct platform_device *pdev; int err; np = of_find_node_by_path("/"); @@ -570,15 +571,23 @@ static int __init mtk_cpufreq_driver_init(void) * and the device registration codes are put here to handle defer * probing. */ - pdev = platform_device_register_simple("mtk-cpufreq", -1, NULL, 0); - if (IS_ERR(pdev)) { + cpufreq_pdev = platform_device_register_simple("mtk-cpufreq", -1, NULL, 0); + if (IS_ERR(cpufreq_pdev)) { pr_err("failed to register mtk-cpufreq platform device\n"); - return PTR_ERR(pdev); + platform_driver_unregister(&mtk_cpufreq_platdrv); + return PTR_ERR(cpufreq_pdev); } return 0; } -device_initcall(mtk_cpufreq_driver_init); +module_init(mtk_cpufreq_driver_init) + +static void __exit mtk_cpufreq_driver_exit(void) +{ + platform_device_unregister(cpufreq_pdev); + platform_driver_unregister(&mtk_cpufreq_platdrv); +} +module_exit(mtk_cpufreq_driver_exit) MODULE_DESCRIPTION("MediaTek CPUFreq driver"); MODULE_AUTHOR("Pi-Cheng Chen "); diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 73621bc11976..3704476bb83a 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -471,6 +471,10 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) if (slew_done_gpio_np) slew_done_gpio = read_gpio(slew_done_gpio_np); + of_node_put(volt_gpio_np); + of_node_put(freq_gpio_np); + of_node_put(slew_done_gpio_np); + /* If we use the frequency GPIOs, calculate the min/max speeds based * on the bus frequencies */ diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 6b6b20da2bcf..573b417e1483 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -275,6 +275,7 @@ static int qoriq_cpufreq_probe(struct platform_device *pdev) np = of_find_matching_node(NULL, qoriq_cpufreq_blacklist); if (np) { + of_node_put(np); dev_info(&pdev->dev, "Disabling due to erratum A-008083"); return -ENODEV; } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index f783748462f9..7b3be3dc2210 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -93,6 +93,68 @@ static int sun8i_ss_cipher_fallback(struct skcipher_request *areq) return err; } +static int sun8i_ss_setup_ivs(struct skcipher_request *areq) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sun8i_ss_dev *ss = op->ss; + struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq); + struct scatterlist *sg = areq->src; + unsigned int todo, offset; + unsigned int len = areq->cryptlen; + unsigned int ivsize = crypto_skcipher_ivsize(tfm); + struct sun8i_ss_flow *sf = &ss->flows[rctx->flow]; + int i = 0; + u32 a; + int err; + + rctx->ivlen = ivsize; + if (rctx->op_dir & SS_DECRYPTION) { + offset = areq->cryptlen - ivsize; + scatterwalk_map_and_copy(sf->biv, areq->src, offset, + ivsize, 0); + } + + /* we need to copy all IVs from source in case DMA is bi-directionnal */ + while (sg && len) { + if (sg_dma_len(sg) == 0) { + sg = sg_next(sg); + continue; + } + if (i == 0) + memcpy(sf->iv[0], areq->iv, ivsize); + a = dma_map_single(ss->dev, sf->iv[i], ivsize, DMA_TO_DEVICE); + if (dma_mapping_error(ss->dev, a)) { + memzero_explicit(sf->iv[i], ivsize); + dev_err(ss->dev, "Cannot DMA MAP IV\n"); + err = -EFAULT; + goto dma_iv_error; + } + rctx->p_iv[i] = a; + /* we need to setup all others IVs only in the decrypt way */ + if (rctx->op_dir & SS_ENCRYPTION) + return 0; + todo = min(len, sg_dma_len(sg)); + len -= todo; + i++; + if (i < MAX_SG) { + offset = sg->length - ivsize; + scatterwalk_map_and_copy(sf->iv[i], sg, offset, ivsize, 0); + } + rctx->niv = i; + sg = sg_next(sg); + } + + return 0; +dma_iv_error: + i--; + while (i >= 0) { + dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE); + memzero_explicit(sf->iv[i], ivsize); + } + return err; +} + static int sun8i_ss_cipher(struct skcipher_request *areq) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); @@ -101,9 +163,9 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct sun8i_ss_alg_template *algt; + struct sun8i_ss_flow *sf = &ss->flows[rctx->flow]; struct scatterlist *sg; unsigned int todo, len, offset, ivsize; - void *backup_iv = NULL; int nr_sgs = 0; int nr_sgd = 0; int err = 0; @@ -134,30 +196,9 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) ivsize = crypto_skcipher_ivsize(tfm); if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { - rctx->ivlen = ivsize; - rctx->biv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA); - if (!rctx->biv) { - err = -ENOMEM; + err = sun8i_ss_setup_ivs(areq); + if (err) goto theend_key; - } - if (rctx->op_dir & SS_DECRYPTION) { - backup_iv = kzalloc(ivsize, GFP_KERNEL); - if (!backup_iv) { - err = -ENOMEM; - goto theend_key; - } - offset = areq->cryptlen - ivsize; - scatterwalk_map_and_copy(backup_iv, areq->src, offset, - ivsize, 0); - } - memcpy(rctx->biv, areq->iv, ivsize); - rctx->p_iv = dma_map_single(ss->dev, rctx->biv, rctx->ivlen, - DMA_TO_DEVICE); - if (dma_mapping_error(ss->dev, rctx->p_iv)) { - dev_err(ss->dev, "Cannot DMA MAP IV\n"); - err = -ENOMEM; - goto theend_iv; - } } if (areq->src == areq->dst) { nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src), @@ -240,21 +281,19 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) } theend_iv: - if (rctx->p_iv) - dma_unmap_single(ss->dev, rctx->p_iv, rctx->ivlen, - DMA_TO_DEVICE); - if (areq->iv && ivsize > 0) { - if (rctx->biv) { - offset = areq->cryptlen - ivsize; - if (rctx->op_dir & SS_DECRYPTION) { - memcpy(areq->iv, backup_iv, ivsize); - kfree_sensitive(backup_iv); - } else { - scatterwalk_map_and_copy(areq->iv, areq->dst, offset, - ivsize, 0); - } - kfree(rctx->biv); + for (i = 0; i < rctx->niv; i++) { + dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE); + memzero_explicit(sf->iv[i], ivsize); + } + + offset = areq->cryptlen - ivsize; + if (rctx->op_dir & SS_DECRYPTION) { + memcpy(areq->iv, sf->biv, ivsize); + memzero_explicit(sf->biv, ivsize); + } else { + scatterwalk_map_and_copy(areq->iv, areq->dst, offset, + ivsize, 0); } } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 319fe3279a71..657530578643 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -66,6 +66,7 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx const char *name) { int flow = rctx->flow; + unsigned int ivlen = rctx->ivlen; u32 v = SS_START; int i; @@ -104,15 +105,14 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx mutex_lock(&ss->mlock); writel(rctx->p_key, ss->base + SS_KEY_ADR_REG); - if (i == 0) { - if (rctx->p_iv) - writel(rctx->p_iv, ss->base + SS_IV_ADR_REG); - } else { - if (rctx->biv) { - if (rctx->op_dir == SS_ENCRYPTION) - writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG); + if (ivlen) { + if (rctx->op_dir == SS_ENCRYPTION) { + if (i == 0) + writel(rctx->p_iv[0], ss->base + SS_IV_ADR_REG); else - writel(rctx->t_src[i - 1].addr + rctx->t_src[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG); + writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - ivlen, ss->base + SS_IV_ADR_REG); + } else { + writel(rctx->p_iv[i], ss->base + SS_IV_ADR_REG); } } @@ -464,7 +464,7 @@ static void sun8i_ss_free_flows(struct sun8i_ss_dev *ss, int i) */ static int allocate_flows(struct sun8i_ss_dev *ss) { - int i, err; + int i, j, err; ss->flows = devm_kcalloc(ss->dev, MAXFLOW, sizeof(struct sun8i_ss_flow), GFP_KERNEL); @@ -474,6 +474,18 @@ static int allocate_flows(struct sun8i_ss_dev *ss) for (i = 0; i < MAXFLOW; i++) { init_completion(&ss->flows[i].complete); + ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, + GFP_KERNEL | GFP_DMA); + if (!ss->flows[i].biv) + goto error_engine; + + for (j = 0; j < MAX_SG; j++) { + ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, + GFP_KERNEL | GFP_DMA); + if (!ss->flows[i].iv[j]) + goto error_engine; + } + ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true); if (!ss->flows[i].engine) { dev_err(ss->dev, "Cannot allocate engine\n"); diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index c9edecd43ef9..55d652cd468b 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -379,13 +379,21 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) } len = areq->nbytes; - for_each_sg(areq->src, sg, nr_sgs, i) { + sg = areq->src; + i = 0; + while (len > 0 && sg) { + if (sg_dma_len(sg) == 0) { + sg = sg_next(sg); + continue; + } rctx->t_src[i].addr = sg_dma_address(sg); todo = min(len, sg_dma_len(sg)); rctx->t_src[i].len = todo / 4; len -= todo; rctx->t_dst[i].addr = addr_res; rctx->t_dst[i].len = digestsize / 4; + sg = sg_next(sg); + i++; } if (len > 0) { dev_err(ss->dev, "remaining len %d\n", len); diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index 1a66457f4a20..49147195ecf6 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -120,11 +120,15 @@ struct sginfo { * @complete: completion for the current task on this flow * @status: set to 1 by interrupt if task is done * @stat_req: number of request done by this flow + * @iv: list of IV to use for each step + * @biv: buffer which contain the backuped IV */ struct sun8i_ss_flow { struct crypto_engine *engine; struct completion complete; int status; + u8 *iv[MAX_SG]; + u8 *biv; #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG unsigned long stat_req; #endif @@ -163,28 +167,28 @@ struct sun8i_ss_dev { * @t_src: list of mapped SGs with their size * @t_dst: list of mapped SGs with their size * @p_key: DMA address of the key - * @p_iv: DMA address of the IV + * @p_iv: DMA address of the IVs + * @niv: Number of IVs DMA mapped * @method: current algorithm for this request * @op_mode: op_mode for this request * @op_dir: direction (encrypt vs decrypt) for this request * @flow: the flow to use for this request - * @ivlen: size of biv + * @ivlen: size of IVs * @keylen: keylen for this request - * @biv: buffer which contain the IV * @fallback_req: request struct for invoking the fallback skcipher TFM */ struct sun8i_cipher_req_ctx { struct sginfo t_src[MAX_SG]; struct sginfo t_dst[MAX_SG]; u32 p_key; - u32 p_iv; + u32 p_iv[MAX_SG]; + int niv; u32 method; u32 op_mode; u32 op_dir; int flow; unsigned int ivlen; unsigned int keylen; - void *biv; struct skcipher_request fallback_req; // keep at the end }; diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index ca0361b2dbb0..f87aa2169e5f 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -609,6 +609,13 @@ static bool check_version(struct fsl_mc_version *mc_version, u32 major, } #endif +static bool needs_entropy_delay_adjustment(void) +{ + if (of_machine_is_compatible("fsl,imx6sx")) + return true; + return false; +} + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -855,6 +862,8 @@ static int caam_probe(struct platform_device *pdev) * Also, if a handle was instantiated, do not change * the TRNG parameters. */ + if (needs_entropy_delay_adjustment()) + ent_delay = 12000; if (!(ctrlpriv->rng4_sh_init || inst_handles)) { dev_info(dev, "Entropy delay = %u\n", @@ -871,6 +880,15 @@ static int caam_probe(struct platform_device *pdev) */ ret = instantiate_rng(dev, inst_handles, gen_sk); + /* + * Entropy delay is determined via TRNG characterization. + * TRNG characterization is run across different voltages + * and temperatures. + * If worst case value for ent_dly is identified, + * the loop can be skipped for that platform. + */ + if (needs_entropy_delay_adjustment()) + break; if (ret == -EAGAIN) /* * if here, the loop will rerun, diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 11e0278c8631..6140e4927322 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -356,12 +356,14 @@ void cc_unmap_cipher_request(struct device *dev, void *ctx, req_ctx->mlli_params.mlli_dma_addr); } - dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL); - dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); - if (src != dst) { - dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_TO_DEVICE); + dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_FROM_DEVICE); dev_dbg(dev, "Unmapped req->dst=%pK\n", sg_virt(dst)); + dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); + } else { + dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL); + dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); } } @@ -377,6 +379,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, u32 dummy = 0; int rc = 0; u32 mapped_nents = 0; + int src_direction = (src != dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); req_ctx->dma_buf_type = CC_DMA_BUF_DLLI; mlli_params->curr_pool = NULL; @@ -399,7 +402,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } /* Map the src SGL */ - rc = cc_map_sg(dev, src, nbytes, DMA_BIDIRECTIONAL, &req_ctx->in_nents, + rc = cc_map_sg(dev, src, nbytes, src_direction, &req_ctx->in_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents); if (rc) goto cipher_exit; @@ -416,7 +419,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } } else { /* Map the dst sg */ - rc = cc_map_sg(dev, dst, nbytes, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, dst, nbytes, DMA_FROM_DEVICE, &req_ctx->out_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents); if (rc) @@ -456,6 +459,7 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) struct aead_req_ctx *areq_ctx = aead_request_ctx(req); unsigned int hw_iv_size = areq_ctx->hw_iv_size; struct cc_drvdata *drvdata = dev_get_drvdata(dev); + int src_direction = (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); if (areq_ctx->mac_buf_dma_addr) { dma_unmap_single(dev, areq_ctx->mac_buf_dma_addr, @@ -514,13 +518,11 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents, areq_ctx->assoclen, req->cryptlen); - dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, - DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, src_direction); if (req->src != req->dst) { dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", sg_virt(req->dst)); - dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, - DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_FROM_DEVICE); } if (drvdata->coherent && areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT && @@ -843,7 +845,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, else size_for_map -= authsize; - rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, req->dst, size_for_map, DMA_FROM_DEVICE, &areq_ctx->dst.mapped_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes, &dst_mapped_nents); @@ -1056,7 +1058,8 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) size_to_map += authsize; } - rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, req->src, size_to_map, + (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL), &areq_ctx->src.mapped_nents, (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES + LLI_MAX_NUM_OF_DATA_ENTRIES), diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index b4a6ff9dd6d5..596a8c74e40a 100644 --- a/drivers/crypto/marvell/cesa/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -614,7 +614,6 @@ struct skcipher_alg mv_cesa_ecb_des3_ede_alg = { .decrypt = mv_cesa_ecb_des3_ede_decrypt, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, .base = { .cra_name = "ecb(des3_ede)", .cra_driver_name = "mv-ecb-des3-ede", diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c index 13c65deda8e9..8a4f10bb3fcd 100644 --- a/drivers/crypto/nx/nx-common-powernv.c +++ b/drivers/crypto/nx/nx-common-powernv.c @@ -827,7 +827,7 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, goto err_out; vas_init_rx_win_attr(&rxattr, coproc->ct); - rxattr.rx_fifo = (void *)rx_fifo; + rxattr.rx_fifo = rx_fifo; rxattr.rx_fifo_size = fifo_size; rxattr.lnotify_lpid = lpid; rxattr.lnotify_pid = pid; diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c index 11f30fd48c14..031b5f701a0a 100644 --- a/drivers/crypto/qcom-rng.c +++ b/drivers/crypto/qcom-rng.c @@ -65,6 +65,7 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max) } else { /* copy only remaining bytes */ memcpy(data, &val, max - currsize); + break; } } while (currsize < max); diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index be1bf39a317d..90a920e7f664 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -384,8 +384,10 @@ static int stm32_crc_remove(struct platform_device *pdev) struct stm32_crc *crc = platform_get_drvdata(pdev); int ret = pm_runtime_get_sync(crc->dev); - if (ret < 0) + if (ret < 0) { + pm_runtime_put_noidle(crc->dev); return ret; + } spin_lock(&crc_list.lock); list_del(&crc->list); diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 17ed980d9099..d6da9c3e3106 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -514,15 +514,19 @@ static int of_get_devfreq_events(struct device_node *np, count = of_get_child_count(events_np); desc = devm_kcalloc(dev, count, sizeof(*desc), GFP_KERNEL); - if (!desc) + if (!desc) { + of_node_put(events_np); return -ENOMEM; + } info->num_events = count; of_id = of_match_device(exynos_ppmu_id_match, dev); if (of_id) info->ppmu_type = (enum exynos_ppmu_type)of_id->data; - else + else { + of_node_put(events_np); return -EINVAL; + } j = 0; for_each_child_of_node(events_np, node) { diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 2e912166a993..7e52375d9818 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -485,6 +485,8 @@ static int rk3399_dmcfreq_remove(struct platform_device *pdev) { struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev); + devfreq_event_disable_edev(dmcfreq->edev); + /* * Before remove the opp table we need to unregister the opp notifier. */ diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index cfbf10128aae..2e3b76519b49 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -26,8 +26,11 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct udmabuf *ubuf = vma->vm_private_data; + pgoff_t pgoff = vmf->pgoff; - vmf->page = ubuf->pages[vmf->pgoff]; + if (pgoff >= ubuf->pagecount) + return VM_FAULT_SIGBUS; + vmf->page = ubuf->pages[pgoff]; get_page(vmf->page); return 0; } diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 47552db6b8dc..b5d691ae45dc 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1838,6 +1838,11 @@ static int at_xdmac_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < init_nr_desc_per_channel; i++) { desc = at_xdmac_alloc_desc(chan, GFP_KERNEL); if (!desc) { + if (i == 0) { + dev_warn(chan2dev(chan), + "can't allocate any descriptors\n"); + return -EIO; + } dev_warn(chan2dev(chan), "only %d descriptors have been allocated\n", i); break; diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 4da88578ed64..ae65eb90afab 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -266,10 +266,16 @@ int idxd_cdev_register(void) rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK, ictx[i].name); if (rc) - return rc; + goto err_free_chrdev_region; } return 0; + +err_free_chrdev_region: + for (i--; i >= 0; i--) + unregister_chrdev_region(ictx[i].devt, MINORMASK); + + return rc; } void idxd_cdev_remove(void) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index aa7435555de9..09ad37bbd98b 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -82,6 +82,27 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq, hw->int_handle = wq->vec_ptr; } +static struct dma_async_tx_descriptor * +idxd_dma_prep_interrupt(struct dma_chan *c, unsigned long flags) +{ + struct idxd_wq *wq = to_idxd_wq(c); + u32 desc_flags; + struct idxd_desc *desc; + + if (wq->state != IDXD_WQ_ENABLED) + return NULL; + + op_flag_setup(flags, &desc_flags); + desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); + if (IS_ERR(desc)) + return NULL; + + idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_NOOP, + 0, 0, 0, desc->compl_dma, desc_flags); + desc->txd.flags = flags; + return &desc->txd; +} + static struct dma_async_tx_descriptor * idxd_dma_submit_memcpy(struct dma_chan *c, dma_addr_t dma_dest, dma_addr_t dma_src, size_t len, unsigned long flags) @@ -188,10 +209,12 @@ int idxd_register_dma_device(struct idxd_device *idxd) INIT_LIST_HEAD(&dma->channels); dma->dev = dev; + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); dma_cap_set(DMA_PRIVATE, dma->cap_mask); dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask); dma->device_release = idxd_dma_release; + dma->device_prep_dma_interrupt = idxd_dma_prep_interrupt; if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) { dma_cap_set(DMA_MEMCPY, dma->cap_mask); dma->device_prep_dma_memcpy = idxd_dma_submit_memcpy; diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 792c91cd1608..2283dcd8bf91 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -2212,7 +2212,7 @@ MODULE_DESCRIPTION("i.MX SDMA driver"); #if IS_ENABLED(CONFIG_SOC_IMX6Q) MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin"); #endif -#if IS_ENABLED(CONFIG_SOC_IMX7D) +#if IS_ENABLED(CONFIG_SOC_IMX7D) || IS_ENABLED(CONFIG_SOC_IMX8M) MODULE_FIRMWARE("imx/sdma/sdma-imx7d.bin"); #endif MODULE_LICENSE("GPL"); diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 6dca548f4dab..5bbae99f2d34 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2591,7 +2591,7 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) /* If the DMAC pool is empty, alloc new */ if (!desc) { - DEFINE_SPINLOCK(lock); + static DEFINE_SPINLOCK(lock); LIST_HEAD(pool); if (!add_desc(&pool, &lock, GFP_ATOMIC, 1)) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index fe36738f2dd7..9d54746c422c 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -40,7 +40,6 @@ STM32_MDMA_SHIFT(mask)) #define STM32_MDMA_GISR0 0x0000 /* MDMA Int Status Reg 1 */ -#define STM32_MDMA_GISR1 0x0004 /* MDMA Int Status Reg 2 */ /* MDMA Channel x interrupt/status register */ #define STM32_MDMA_CISR(x) (0x40 + 0x40 * (x)) /* x = 0..62 */ @@ -196,7 +195,7 @@ #define STM32_MDMA_MAX_BUF_LEN 128 #define STM32_MDMA_MAX_BLOCK_LEN 65536 -#define STM32_MDMA_MAX_CHANNELS 63 +#define STM32_MDMA_MAX_CHANNELS 32 #define STM32_MDMA_MAX_REQUESTS 256 #define STM32_MDMA_MAX_BURST 128 #define STM32_MDMA_VERY_HIGH_PRIORITY 0x11 @@ -1345,90 +1344,84 @@ static void stm32_mdma_xfer_end(struct stm32_mdma_chan *chan) static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid) { struct stm32_mdma_device *dmadev = devid; - struct stm32_mdma_chan *chan = devid; - u32 reg, id, ien, status, flag; + struct stm32_mdma_chan *chan; + u32 reg, id, ccr, ien, status; /* Find out which channel generates the interrupt */ status = readl_relaxed(dmadev->base + STM32_MDMA_GISR0); - if (status) { - id = __ffs(status); - } else { - status = readl_relaxed(dmadev->base + STM32_MDMA_GISR1); - if (!status) { - dev_dbg(mdma2dev(dmadev), "spurious it\n"); - return IRQ_NONE; - } - id = __ffs(status); - /* - * As GISR0 provides status for channel id from 0 to 31, - * so GISR1 provides status for channel id from 32 to 62 - */ - id += 32; + if (!status) { + dev_dbg(mdma2dev(dmadev), "spurious it\n"); + return IRQ_NONE; } + id = __ffs(status); chan = &dmadev->chan[id]; if (!chan) { - dev_dbg(mdma2dev(dmadev), "MDMA channel not initialized\n"); - goto exit; + dev_warn(mdma2dev(dmadev), "MDMA channel not initialized\n"); + return IRQ_NONE; } /* Handle interrupt for the channel */ spin_lock(&chan->vchan.lock); - status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id)); - ien = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)); - ien &= STM32_MDMA_CCR_IRQ_MASK; - ien >>= 1; + status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(id)); + /* Mask Channel ReQuest Active bit which can be set in case of MEM2MEM */ + status &= ~STM32_MDMA_CISR_CRQA; + ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(id)); + ien = (ccr & STM32_MDMA_CCR_IRQ_MASK) >> 1; if (!(status & ien)) { spin_unlock(&chan->vchan.lock); - dev_dbg(chan2dev(chan), - "spurious it (status=0x%04x, ien=0x%04x)\n", - status, ien); + dev_warn(chan2dev(chan), + "spurious it (status=0x%04x, ien=0x%04x)\n", + status, ien); return IRQ_NONE; } - flag = __ffs(status & ien); - reg = STM32_MDMA_CIFCR(chan->id); + reg = STM32_MDMA_CIFCR(id); - switch (1 << flag) { - case STM32_MDMA_CISR_TEIF: - id = chan->id; - status = readl_relaxed(dmadev->base + STM32_MDMA_CESR(id)); - dev_err(chan2dev(chan), "Transfer Err: stat=0x%08x\n", status); + if (status & STM32_MDMA_CISR_TEIF) { + dev_err(chan2dev(chan), "Transfer Err: stat=0x%08x\n", + readl_relaxed(dmadev->base + STM32_MDMA_CESR(id))); stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CTEIF); - break; + status &= ~STM32_MDMA_CISR_TEIF; + } - case STM32_MDMA_CISR_CTCIF: + if (status & STM32_MDMA_CISR_CTCIF) { stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CCTCIF); + status &= ~STM32_MDMA_CISR_CTCIF; stm32_mdma_xfer_end(chan); - break; + } - case STM32_MDMA_CISR_BRTIF: + if (status & STM32_MDMA_CISR_BRTIF) { stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CBRTIF); - break; + status &= ~STM32_MDMA_CISR_BRTIF; + } - case STM32_MDMA_CISR_BTIF: + if (status & STM32_MDMA_CISR_BTIF) { stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CBTIF); + status &= ~STM32_MDMA_CISR_BTIF; chan->curr_hwdesc++; if (chan->desc && chan->desc->cyclic) { if (chan->curr_hwdesc == chan->desc->count) chan->curr_hwdesc = 0; vchan_cyclic_callback(&chan->desc->vdesc); } - break; + } - case STM32_MDMA_CISR_TCIF: + if (status & STM32_MDMA_CISR_TCIF) { stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CLTCIF); - break; + status &= ~STM32_MDMA_CISR_TCIF; + } - default: - dev_err(chan2dev(chan), "it %d unhandled (status=0x%04x)\n", - 1 << flag, status); + if (status) { + stm32_mdma_set_bits(dmadev, reg, status); + dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status); + if (!(ccr & STM32_MDMA_CCR_EN)) + dev_err(chan2dev(chan), "chan disabled by HW\n"); } spin_unlock(&chan->vchan.lock); -exit: return IRQ_HANDLED; } diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c index 4ba8fa5d9c36..86ced7f2d771 100644 --- a/drivers/dma/ti/dma-crossbar.c +++ b/drivers/dma/ti/dma-crossbar.c @@ -245,6 +245,7 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, if (dma_spec->args[0] >= xbar->xbar_requests) { dev_err(&pdev->dev, "Invalid XBAR request number: %d\n", dma_spec->args[0]); + put_device(&pdev->dev); return ERR_PTR(-EINVAL); } @@ -252,12 +253,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); if (!dma_spec->np) { dev_err(&pdev->dev, "Can't get DMA master\n"); + put_device(&pdev->dev); return ERR_PTR(-EINVAL); } map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { of_node_put(dma_spec->np); + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); } @@ -268,6 +271,8 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, mutex_unlock(&xbar->mutex); dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); + of_node_put(dma_spec->np); + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c index 5fecf5aa6e85..7e6be076e9d3 100644 --- a/drivers/dma/xilinx/zynqmp_dma.c +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -232,7 +232,7 @@ struct zynqmp_dma_chan { bool is_dmacoherent; struct tasklet_struct tasklet; bool idle; - u32 desc_size; + size_t desc_size; bool err; u32 bus_width; u32 src_burst_len; @@ -490,7 +490,8 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan) } chan->desc_pool_v = dma_alloc_coherent(chan->dev, - (2 * chan->desc_size * ZYNQMP_DMA_NUM_DESCS), + (2 * ZYNQMP_DMA_DESC_SIZE(chan) * + ZYNQMP_DMA_NUM_DESCS), &chan->desc_pool_p, GFP_KERNEL); if (!chan->desc_pool_v) return -ENOMEM; diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c index b8a7d9594afd..1fa5ca57e9ec 100644 --- a/drivers/edac/dmc520_edac.c +++ b/drivers/edac/dmc520_edac.c @@ -489,7 +489,7 @@ static int dmc520_edac_probe(struct platform_device *pdev) dev = &pdev->dev; for (idx = 0; idx < NUMBER_OF_IRQS; idx++) { - irq = platform_get_irq_byname(pdev, dmc520_irq_configs[idx].name); + irq = platform_get_irq_byname_optional(pdev, dmc520_irq_configs[idx].name); irqs[idx] = irq; masks[idx] = dmc520_irq_configs[idx].mask; if (irq >= 0) { diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index a918ca93e4f7..df5897c90bec 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -101,9 +101,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle) dmi_memdev_name(handle, &bank, &device); - /* both strings must be non-zero */ - if (bank && *bank && device && *device) - snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device); + /* + * Set to a NULL string when both bank and device are zero. In this case, + * the label assigned by default will be preserved. + */ + snprintf(dimm->label, sizeof(dimm->label), "%s%s%s", + (bank && *bank) ? bank : "", + (bank && *bank && device && *device) ? " " : "", + (device && *device) ? device : ""); } static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry) diff --git a/drivers/extcon/extcon-ptn5150.c b/drivers/extcon/extcon-ptn5150.c index 5b9a3cf8df26..2a7874108df8 100644 --- a/drivers/extcon/extcon-ptn5150.c +++ b/drivers/extcon/extcon-ptn5150.c @@ -194,6 +194,13 @@ static int ptn5150_init_dev_type(struct ptn5150_info *info) return 0; } +static void ptn5150_work_sync_and_put(void *data) +{ + struct ptn5150_info *info = data; + + cancel_work_sync(&info->irq_work); +} + static int ptn5150_i2c_probe(struct i2c_client *i2c) { struct device *dev = &i2c->dev; @@ -284,6 +291,10 @@ static int ptn5150_i2c_probe(struct i2c_client *i2c) if (ret) return -EINVAL; + ret = devm_add_action_or_reset(dev, ptn5150_work_sync_and_put, info); + if (ret) + return ret; + /* * Update current extcon state if for example OTG connection was there * before the probe diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index e7a9561a826d..356610404bb4 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -1230,19 +1230,14 @@ int extcon_dev_register(struct extcon_dev *edev) edev->dev.type = &edev->extcon_dev_type; } - ret = device_register(&edev->dev); - if (ret) { - put_device(&edev->dev); - goto err_dev; - } - spin_lock_init(&edev->lock); - edev->nh = devm_kcalloc(&edev->dev, edev->max_supported, - sizeof(*edev->nh), GFP_KERNEL); - if (!edev->nh) { - ret = -ENOMEM; - device_unregister(&edev->dev); - goto err_dev; + if (edev->max_supported) { + edev->nh = kcalloc(edev->max_supported, sizeof(*edev->nh), + GFP_KERNEL); + if (!edev->nh) { + ret = -ENOMEM; + goto err_alloc_nh; + } } for (index = 0; index < edev->max_supported; index++) @@ -1253,6 +1248,12 @@ int extcon_dev_register(struct extcon_dev *edev) dev_set_drvdata(&edev->dev, edev); edev->state = 0; + ret = device_register(&edev->dev); + if (ret) { + put_device(&edev->dev); + goto err_dev; + } + mutex_lock(&extcon_dev_list_lock); list_add(&edev->entry, &extcon_dev_list); mutex_unlock(&extcon_dev_list_lock); @@ -1260,6 +1261,9 @@ int extcon_dev_register(struct extcon_dev *edev) return 0; err_dev: + if (edev->max_supported) + kfree(edev->nh); +err_alloc_nh: if (edev->max_supported) kfree(edev->extcon_dev_type.groups); err_alloc_groups: @@ -1320,6 +1324,7 @@ void extcon_dev_unregister(struct extcon_dev *edev) if (edev->max_supported) { kfree(edev->extcon_dev_type.groups); kfree(edev->cables); + kfree(edev->nh); } put_device(&edev->dev); diff --git a/drivers/firmware/arm_scmi/base.c b/drivers/firmware/arm_scmi/base.c index b70d741f8465..439b2099abeb 100644 --- a/drivers/firmware/arm_scmi/base.c +++ b/drivers/firmware/arm_scmi/base.c @@ -191,7 +191,7 @@ scmi_base_implementation_list_get(const struct scmi_protocol_handle *ph, break; loop_num_ret = le32_to_cpu(*num_ret); - if (tot_num_ret + loop_num_ret > MAX_PROTOCOLS_IMP) { + if (loop_num_ret > MAX_PROTOCOLS_IMP - tot_num_ret) { dev_err(dev, "No. of Protocol > MAX_PROTOCOLS_IMP"); break; } diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c index 8b8127fa8955..4a93fb490cb4 100644 --- a/drivers/firmware/dmi-sysfs.c +++ b/drivers/firmware/dmi-sysfs.c @@ -603,7 +603,7 @@ static void __init dmi_sysfs_register_handle(const struct dmi_header *dh, "%d-%d", dh->type, entry->instance); if (*ret) { - kfree(entry); + kobject_put(&entry->kobj); return; } diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 53c7e3f8cfde..7dd0ac1a0cfc 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -941,17 +941,17 @@ EXPORT_SYMBOL_GPL(stratix10_svc_allocate_memory); void stratix10_svc_free_memory(struct stratix10_svc_chan *chan, void *kaddr) { struct stratix10_svc_data_mem *pmem; - size_t size = 0; list_for_each_entry(pmem, &svc_data_mem, node) if (pmem->vaddr == kaddr) { - size = pmem->size; - break; + gen_pool_free(chan->ctrl->genpool, + (unsigned long)kaddr, pmem->size); + pmem->vaddr = NULL; + list_del(&pmem->node); + return; } - gen_pool_free(chan->ctrl->genpool, (unsigned long)kaddr, size); - pmem->vaddr = NULL; - list_del(&pmem->node); + list_del(&svc_data_mem); } EXPORT_SYMBOL_GPL(stratix10_svc_free_memory); diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 4275c18a097a..ea2e2618b794 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -646,10 +646,9 @@ static int dwapb_get_clks(struct dwapb_gpio *gpio) gpio->clks[1].id = "db"; err = devm_clk_bulk_get_optional(gpio->dev, DWAPB_NR_CLOCKS, gpio->clks); - if (err) { - dev_err(gpio->dev, "Cannot get APB/Debounce clocks\n"); - return err; - } + if (err) + return dev_err_probe(gpio->dev, err, + "Cannot get APB/Debounce clocks\n"); err = clk_bulk_prepare_enable(DWAPB_NR_CLOCKS, gpio->clks); if (err) { diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index ed7c5fc47f52..2ab34a8e6273 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -700,6 +700,9 @@ static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, unsigned long flags; unsigned int on, off; + if (state->polarity != PWM_POLARITY_NORMAL) + return -EINVAL; + val = (unsigned long long) mvpwm->clk_rate * state->duty_cycle; do_div(val, NSEC_PER_SEC); if (val > UINT_MAX) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index e936e1eb1f95..957be5f69406 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -350,6 +350,9 @@ static const struct regmap_config pca953x_i2c_regmap = { .reg_bits = 8, .val_bits = 8, + .use_single_read = true, + .use_single_write = true, + .readable_reg = pca953x_readable_register, .writeable_reg = pca953x_writeable_register, .volatile_reg = pca953x_volatile_register, @@ -893,15 +896,18 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert) { DECLARE_BITMAP(val, MAX_LINE); + u8 regaddr; int ret; - ret = regcache_sync_region(chip->regmap, chip->regs->output, - chip->regs->output + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip) - 1); if (ret) goto out; - ret = regcache_sync_region(chip->regmap, chip->regs->direction, - chip->regs->direction + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip) - 1); if (ret) goto out; @@ -1107,20 +1113,21 @@ static int pca953x_regcache_sync(struct device *dev) { struct pca953x_chip *chip = dev_get_drvdata(dev); int ret; + u8 regaddr; /* * The ordering between direction and output is important, * sync these registers first and only then sync the rest. */ - ret = regcache_sync_region(chip->regmap, chip->regs->direction, - chip->regs->direction + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0); + ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync GPIO dir registers: %d\n", ret); return ret; } - ret = regcache_sync_region(chip->regmap, chip->regs->output, - chip->regs->output + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0); + ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync GPIO out registers: %d\n", ret); return ret; @@ -1128,16 +1135,18 @@ static int pca953x_regcache_sync(struct device *dev) #ifdef CONFIG_GPIO_PCA953X_IRQ if (chip->driver_data & PCA_PCAL) { - ret = regcache_sync_region(chip->regmap, PCAL953X_IN_LATCH, - PCAL953X_IN_LATCH + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, PCAL953X_IN_LATCH, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync INT latch registers: %d\n", ret); return ret; } - ret = regcache_sync_region(chip->regmap, PCAL953X_INT_MASK, - PCAL953X_INT_MASK + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, PCAL953X_INT_MASK, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync INT mask registers: %d\n", ret); diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 58776f2d69ff..1ae612c796ee 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -125,9 +125,13 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, { struct vf610_gpio_port *port = gpiochip_get_data(chip); unsigned long mask = BIT(gpio); + u32 val; - if (port->sdata && port->sdata->have_paddr) - vf610_gpio_writel(mask, port->gpio_base + GPIO_PDDR); + if (port->sdata && port->sdata->have_paddr) { + val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR); + val |= mask; + vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); + } vf610_gpio_set(chip, gpio, value); diff --git a/drivers/gpio/gpio-vr41xx.c b/drivers/gpio/gpio-vr41xx.c index 98cd715ccc33..8d09b619c166 100644 --- a/drivers/gpio/gpio-vr41xx.c +++ b/drivers/gpio/gpio-vr41xx.c @@ -217,8 +217,6 @@ static int giu_get_irq(unsigned int irq) printk(KERN_ERR "spurious GIU interrupt: %04x(%04x),%04x(%04x)\n", maskl, pendl, maskh, pendh); - atomic_inc(&irq_err_count); - return -EINVAL; } diff --git a/drivers/gpio/gpio-winbond.c b/drivers/gpio/gpio-winbond.c index 7f8f5b02e31d..4b61d975cc0e 100644 --- a/drivers/gpio/gpio-winbond.c +++ b/drivers/gpio/gpio-winbond.c @@ -385,12 +385,13 @@ static int winbond_gpio_get(struct gpio_chip *gc, unsigned int offset) unsigned long *base = gpiochip_get_data(gc); const struct winbond_gpio_info *info; bool val; + int ret; winbond_gpio_get_info(&offset, &info); - val = winbond_sio_enter(*base); - if (val) - return val; + ret = winbond_sio_enter(*base); + if (ret) + return ret; winbond_sio_select_logical(*base, info->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fb6230c62daa..d3a974d10552 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -689,7 +689,8 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) const uint32_t flush_type = 0; bool all_hub = false; - if (adev->family == AMDGPU_FAMILY_AI) + if (adev->family == AMDGPU_FAMILY_AI || + adev->family == AMDGPU_FAMILY_RV) all_hub = true; return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 867fcee6b0d3..ffd8f5601e28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -116,7 +116,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs int ret; if (cs->in.num_chunks == 0) - return 0; + return -EINVAL; chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); if (!chunk_array) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index b313ce4c3e97..30005ed8156f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -625,8 +625,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) void amdgpu_ucode_free_bo(struct amdgpu_device *adev) { - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) - amdgpu_bo_free_kernel(&adev->firmware.fw_buf, + amdgpu_bo_free_kernel(&adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7bb151283f44..55ecc67592eb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -922,6 +922,37 @@ static void amdgpu_check_debugfs_connector_property_change(struct amdgpu_device } } +struct amdgpu_stutter_quirk { + u16 chip_vendor; + u16 chip_device; + u16 subsys_vendor; + u16 subsys_device; + u8 revision; +}; + +static const struct amdgpu_stutter_quirk amdgpu_stutter_quirk_list[] = { + /* https://bugzilla.kernel.org/show_bug.cgi?id=214417 */ + { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, + { 0, 0, 0, 0, 0 }, +}; + +static bool dm_should_disable_stutter(struct pci_dev *pdev) +{ + const struct amdgpu_stutter_quirk *p = amdgpu_stutter_quirk_list; + + while (p && p->chip_device != 0) { + if (pdev->vendor == p->chip_vendor && + pdev->device == p->chip_device && + pdev->subsystem_vendor == p->subsys_vendor && + pdev->subsystem_device == p->subsys_device && + pdev->revision == p->revision) { + return true; + } + ++p; + } + return false; +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1014,6 +1045,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY) adev->dm.dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true; + if (dm_should_disable_stutter(adev->pdev)) + adev->dm.dc->debug.disable_stutter = true; if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER) adev->dm.dc->debug.disable_stutter = true; @@ -2141,7 +2174,7 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) { - u32 max_cll, min_cll, max, min, q, r; + u32 max_avg, min_cll, max, min, q, r; struct amdgpu_dm_backlight_caps *caps; struct amdgpu_display_manager *dm; struct drm_connector *conn_base; @@ -2164,7 +2197,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) caps = &dm->backlight_caps; caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps; caps->aux_support = false; - max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll; + max_avg = conn_base->hdr_sink_metadata.hdmi_type1.max_fall; min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll; if (caps->ext_caps->bits.oled == 1 /*|| @@ -2192,8 +2225,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) * The results of the above expressions can be verified at * pre_computed_values. */ - q = max_cll >> 5; - r = max_cll % 32; + q = max_avg >> 5; + r = max_avg % 32; max = (1 << q) * pre_computed_values[r]; // min luminance: maxLum * (CV/255)^2 / 100 diff --git a/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c index 4b3faaccecb9..c8a5a5698edd 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/powerplay/kv_dpm.c @@ -1609,19 +1609,7 @@ static int kv_update_samu_dpm(struct amdgpu_device *adev, bool gate) static u8 kv_get_acp_boot_level(struct amdgpu_device *adev) { - u8 i; - struct amdgpu_clock_voltage_dependency_table *table = - &adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table; - - for (i = 0; i < table->count; i++) { - if (table->entries[i].clk >= 0) /* XXX */ - break; - } - - if (i >= table->count) - i = table->count - 1; - - return i; + return 0; } static void kv_update_acp_boot_level(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c index a1e7ba5995c5..d6544a6dabc7 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c @@ -7250,17 +7250,15 @@ static int si_parse_power_table(struct amdgpu_device *adev) if (!adev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - for (i = 0; i < state_array->ucNumEntries; i++) { + for (adev->pm.dpm.num_ps = 0, i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; ps = kzalloc(sizeof(struct si_ps), GFP_KERNEL); - if (ps == NULL) { - kfree(adev->pm.dpm.ps); + if (ps == NULL) return -ENOMEM; - } adev->pm.dpm.ps[i].ps_priv = ps; si_parse_pplib_non_clock_info(adev, &adev->pm.dpm.ps[i], non_clock_info, @@ -7282,8 +7280,8 @@ static int si_parse_power_table(struct amdgpu_device *adev) k++; } power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + adev->pm.dpm.num_ps++; } - adev->pm.dpm.num_ps = state_array->ucNumEntries; /* fill in the vce power states */ for (i = 0; i < adev->pm.dpm.num_of_vce_states; i++) { diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c index 98e915e325dd..bc3f42e915e9 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c @@ -264,6 +264,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms, formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl, layer->layer_type, &n_formats); + if (!formats) { + kfree(kplane); + return -ENOMEM; + } err = drm_universal_plane_init(&kms->base, plane, get_possible_crtcs(kms, c->pipeline), @@ -274,8 +278,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms, komeda_put_fourcc_list(formats); - if (err) - goto cleanup; + if (err) { + kfree(kplane); + return err; + } drm_plane_helper_add(plane, &komeda_plane_helper_funcs); diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 587d94798f5c..af729094260c 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -483,7 +483,10 @@ static void malidp_crtc_reset(struct drm_crtc *crtc) if (crtc->state) malidp_crtc_destroy_state(crtc, crtc->state); - __drm_atomic_helper_crtc_reset(crtc, &state->base); + if (state) + __drm_atomic_helper_crtc_reset(crtc, &state->base); + else + __drm_atomic_helper_crtc_reset(crtc, NULL); } static int malidp_crtc_enable_vblank(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 338c6a2ad322..3028d42eceb2 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1309,6 +1309,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) return 0; err_unregister_cec: + cec_unregister_adapter(adv7511->cec_adap); i2c_unregister_device(adv7511->i2c_cec); if (adv7511->cec_clk) clk_disable_unprepare(adv7511->cec_clk); diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index aa1bb86293fd..a7bcb429c02b 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1268,6 +1268,25 @@ static int analogix_dp_bridge_attach(struct drm_bridge *bridge, return 0; } +static +struct drm_crtc *analogix_dp_get_old_crtc(struct analogix_dp_device *dp, + struct drm_atomic_state *state) +{ + struct drm_encoder *encoder = dp->encoder; + struct drm_connector *connector; + struct drm_connector_state *conn_state; + + connector = drm_atomic_get_old_connector_for_encoder(state, encoder); + if (!connector) + return NULL; + + conn_state = drm_atomic_get_old_connector_state(state, connector); + if (!conn_state) + return NULL; + + return conn_state->crtc; +} + static struct drm_crtc *analogix_dp_get_new_crtc(struct analogix_dp_device *dp, struct drm_atomic_state *state) @@ -1448,14 +1467,16 @@ analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge, { struct drm_atomic_state *old_state = old_bridge_state->base.state; struct analogix_dp_device *dp = bridge->driver_private; - struct drm_crtc *crtc; + struct drm_crtc *old_crtc, *new_crtc; + struct drm_crtc_state *old_crtc_state = NULL; struct drm_crtc_state *new_crtc_state = NULL; + int ret; - crtc = analogix_dp_get_new_crtc(dp, old_state); - if (!crtc) + new_crtc = analogix_dp_get_new_crtc(dp, old_state); + if (!new_crtc) goto out; - new_crtc_state = drm_atomic_get_new_crtc_state(old_state, crtc); + new_crtc_state = drm_atomic_get_new_crtc_state(old_state, new_crtc); if (!new_crtc_state) goto out; @@ -1464,6 +1485,19 @@ analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge, return; out: + old_crtc = analogix_dp_get_old_crtc(dp, old_state); + if (old_crtc) { + old_crtc_state = drm_atomic_get_old_crtc_state(old_state, + old_crtc); + + /* When moving from PSR to fully disabled, exit PSR first. */ + if (old_crtc_state && old_crtc_state->self_refresh_active) { + ret = analogix_dp_disable_psr(dp); + if (ret) + DRM_ERROR("Failed to disable psr (%d)\n", ret); + } + } + analogix_dp_bridge_disable(bridge); } @@ -1639,8 +1673,19 @@ static ssize_t analogix_dpaux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { struct analogix_dp_device *dp = to_dp(aux); + int ret; - return analogix_dp_transfer(dp, msg); + pm_runtime_get_sync(dp->dev); + + ret = analogix_dp_detect_hpd(dp); + if (ret) + goto out; + + ret = analogix_dp_transfer(dp, msg); +out: + pm_runtime_put(dp->dev); + + return ret; } struct analogix_dp_device * @@ -1705,8 +1750,10 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); dp->reg_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dp->reg_base)) - return ERR_CAST(dp->reg_base); + if (IS_ERR(dp->reg_base)) { + ret = PTR_ERR(dp->reg_base); + goto err_disable_clk; + } dp->force_hpd = of_property_read_bool(dev->of_node, "force-hpd"); @@ -1718,7 +1765,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) if (IS_ERR(dp->hpd_gpiod)) { dev_err(dev, "error getting HDP GPIO: %ld\n", PTR_ERR(dp->hpd_gpiod)); - return ERR_CAST(dp->hpd_gpiod); + ret = PTR_ERR(dp->hpd_gpiod); + goto err_disable_clk; } if (dp->hpd_gpiod) { @@ -1738,7 +1786,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) if (dp->irq == -ENXIO) { dev_err(&pdev->dev, "failed to get irq\n"); - return ERR_PTR(-ENODEV); + ret = -ENODEV; + goto err_disable_clk; } ret = devm_request_threaded_irq(&pdev->dev, dp->irq, @@ -1747,11 +1796,15 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) irq_flags, "analogix-dp", dp); if (ret) { dev_err(&pdev->dev, "failed to request irq\n"); - return ERR_PTR(ret); + goto err_disable_clk; } disable_irq(dp->irq); return dp; + +err_disable_clk: + clk_disable_unprepare(dp->clock); + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(analogix_dp_probe); diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 8a871e5c3e26..7fc8e7000046 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -996,9 +996,19 @@ crtc_needs_disable(struct drm_crtc_state *old_state, return drm_atomic_crtc_effectively_active(old_state); /* - * We need to run through the crtc_funcs->disable() function if the CRTC - * is currently on, if it's transitioning to self refresh mode, or if - * it's in self refresh mode and needs to be fully disabled. + * We need to disable bridge(s) and CRTC if we're transitioning out of + * self-refresh and changing CRTCs at the same time, because the + * bridge tracks self-refresh status via CRTC state. + */ + if (old_state->self_refresh_active && + old_state->crtc != new_state->crtc) + return true; + + /* + * We also need to run through the crtc_funcs->disable() function if + * the CRTC is currently on, if it's transitioning to self refresh + * mode, or if it's in self refresh mode and needs to be fully + * disabled. */ return old_state->active || (old_state->self_refresh_active && !new_state->enable) || diff --git a/drivers/gpu/drm/drm_crtc_helper_internal.h b/drivers/gpu/drm/drm_crtc_helper_internal.h index 25ce42e79995..61e09f8a8d0f 100644 --- a/drivers/gpu/drm/drm_crtc_helper_internal.h +++ b/drivers/gpu/drm/drm_crtc_helper_internal.h @@ -32,16 +32,6 @@ #include #include -/* drm_fb_helper.c */ -#ifdef CONFIG_DRM_FBDEV_EMULATION -int drm_fb_helper_modinit(void); -#else -static inline int drm_fb_helper_modinit(void) -{ - return 0; -} -#endif - /* drm_dp_aux_dev.c */ #ifdef CONFIG_DRM_DP_AUX_CHARDEV int drm_dp_aux_dev_init(void); diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 1f54e9470165..ab423b0413ee 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -4792,6 +4792,7 @@ static void fetch_monitor_name(struct drm_dp_mst_topology_mgr *mgr, mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port); drm_edid_get_monitor_name(mst_edid, name, namelen); + kfree(mst_edid); } /** diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 862e173d3431..4334e466b4e0 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1995,9 +1995,6 @@ struct edid *drm_do_get_edid(struct drm_connector *connector, connector_bad_edid(connector, edid, edid[0x7e] + 1); - edid[EDID_LENGTH-1] += edid[0x7e] - valid_extensions; - edid[0x7e] = valid_extensions; - new = kmalloc_array(valid_extensions + 1, EDID_LENGTH, GFP_KERNEL); if (!new) @@ -2014,6 +2011,9 @@ struct edid *drm_do_get_edid(struct drm_connector *connector, base += EDID_LENGTH; } + new[EDID_LENGTH - 1] += new[0x7e] - valid_extensions; + new[0x7e] = valid_extensions; + kfree(edid); edid = new; } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 8033467db4be..ac5d61e65124 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2271,24 +2271,3 @@ void drm_fbdev_generic_setup(struct drm_device *dev, drm_client_register(&fb_helper->client); } EXPORT_SYMBOL(drm_fbdev_generic_setup); - -/* The Kconfig DRM_KMS_HELPER selects FRAMEBUFFER_CONSOLE (if !EXPERT) - * but the module doesn't depend on any fb console symbols. At least - * attempt to load fbcon to avoid leaving the system without a usable console. - */ -int __init drm_fb_helper_modinit(void) -{ -#if defined(CONFIG_FRAMEBUFFER_CONSOLE_MODULE) && !defined(CONFIG_EXPERT) - const char name[] = "fbcon"; - struct module *fbcon; - - mutex_lock(&module_mutex); - fbcon = find_module(name); - mutex_unlock(&module_mutex); - - if (!fbcon) - request_module_nowait(name); -#endif - return 0; -} -EXPORT_SYMBOL(drm_fb_helper_modinit); diff --git a/drivers/gpu/drm/drm_kms_helper_common.c b/drivers/gpu/drm/drm_kms_helper_common.c index 221a8528c993..f933da1656eb 100644 --- a/drivers/gpu/drm/drm_kms_helper_common.c +++ b/drivers/gpu/drm/drm_kms_helper_common.c @@ -64,19 +64,18 @@ MODULE_PARM_DESC(edid_firmware, static int __init drm_kms_helper_init(void) { - int ret; + /* + * The Kconfig DRM_KMS_HELPER selects FRAMEBUFFER_CONSOLE (if !EXPERT) + * but the module doesn't depend on any fb console symbols. At least + * attempt to load fbcon to avoid leaving the system without a usable + * console. + */ + if (IS_ENABLED(CONFIG_DRM_FBDEV_EMULATION) && + IS_MODULE(CONFIG_FRAMEBUFFER_CONSOLE) && + !IS_ENABLED(CONFIG_EXPERT)) + request_module_nowait("fbcon"); - /* Call init functions from specific kms helpers here */ - ret = drm_fb_helper_modinit(); - if (ret < 0) - goto out; - - ret = drm_dp_aux_dev_init(); - if (ret < 0) - goto out; - -out: - return ret; + return drm_dp_aux_dev_init(); } static void __exit drm_kms_helper_exit(void) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index affe1cfed009..24f643982903 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -186,6 +186,13 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, if (WARN_ON(config->num_total_plane >= 32)) return -EINVAL; + /* + * First driver to need more than 64 formats needs to fix this. Each + * format is encoded as a bit and the current code only supports a u64. + */ + if (WARN_ON(format_count > 64)) + return -EINVAL; + WARN_ON(drm_drv_uses_atomic_modeset(dev) && (!funcs->atomic_destroy_state || !funcs->atomic_duplicate_state)); @@ -207,13 +214,6 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, return -ENOMEM; } - /* - * First driver to need more than 64 formats needs to fix this. Each - * format is encoded as a bit and the current code only supports a u64. - */ - if (WARN_ON(format_count > 64)) - return -EINVAL; - if (format_modifiers) { const uint64_t *temp_modifiers = format_modifiers; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 984569a59a90..9ba2fe48228f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -282,6 +282,12 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu_context *context, mutex_lock(&context->lock); + /* Bail if the mapping has been reaped by another thread */ + if (!mapping->context) { + mutex_unlock(&context->lock); + return; + } + /* If the vram node is on the mm, unmap and remove the node */ if (mapping->vram_node.mm == &context->mm) etnaviv_iommu_remove_mapping(context, mapping); diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c index 531c5485be17..bd3df5adb96f 100644 --- a/drivers/gpu/drm/gma500/psb_intel_display.c +++ b/drivers/gpu/drm/gma500/psb_intel_display.c @@ -536,14 +536,15 @@ void psb_intel_crtc_init(struct drm_device *dev, int pipe, struct drm_crtc *psb_intel_get_crtc_from_pipe(struct drm_device *dev, int pipe) { - struct drm_crtc *crtc = NULL; + struct drm_crtc *crtc; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct gma_crtc *gma_crtc = to_gma_crtc(crtc); + if (gma_crtc->pipe == pipe) - break; + return crtc; } - return crtc; + return NULL; } int gma_connector_clones(struct drm_device *dev, int type_mask) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index ecaa538b2d35..ef7878193491 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -790,6 +790,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); if (ret) { + drm_dp_mst_put_port_malloc(port); intel_connector_free(intel_connector); return NULL; } diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index eed037ec0b29..8777d35ac7a7 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -121,9 +121,25 @@ struct i2c_adapter_lookup { #define ICL_GPIO_DDPA_CTRLCLK_2 8 #define ICL_GPIO_DDPA_CTRLDATA_2 9 -static enum port intel_dsi_seq_port_to_port(u8 port) +static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi, + u8 seq_port) { - return port ? PORT_C : PORT_A; + /* + * If single link DSI is being used on any port, the VBT sequence block + * send packet apparently always has 0 for the port. Just use the port + * we have configured, and ignore the sequence block port. + */ + if (hweight8(intel_dsi->ports) == 1) + return ffs(intel_dsi->ports) - 1; + + if (seq_port) { + if (intel_dsi->ports & PORT_B) + return PORT_B; + else if (intel_dsi->ports & PORT_C) + return PORT_C; + } + + return PORT_A; } static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, @@ -145,15 +161,10 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, seq_port = (flags >> MIPI_PORT_SHIFT) & 3; - /* For DSI single link on Port A & C, the seq_port value which is - * parsed from Sequence Block#53 of VBT has been set to 0 - * Now, read/write of packets for the DSI single link on Port A and - * Port C will based on the DVO port from VBT block 2. - */ - if (intel_dsi->ports == (1 << PORT_C)) - port = PORT_C; - else - port = intel_dsi_seq_port_to_port(seq_port); + port = intel_dsi_seq_port_to_port(intel_dsi, seq_port); + + if (drm_WARN_ON(&dev_priv->drm, !intel_dsi->dsi_hosts[port])) + goto out; dsi_device = intel_dsi->dsi_hosts[port]->device; if (!dsi_device) { diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 6d083b98f6ae..abff2d6cedd1 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -376,21 +376,6 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, return -EINVAL; } - /* - * The port numbering and mapping here is bizarre. The now-obsolete - * swsci spec supports ports numbered [0..4]. Port E is handled as a - * special case, but port F and beyond are not. The functionality is - * supposed to be obsolete for new platforms. Just bail out if the port - * number is out of bounds after mapping. - */ - if (port > 4) { - drm_dbg_kms(&dev_priv->drm, - "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", - intel_encoder->base.base.id, intel_encoder->base.name, - port_name(intel_encoder->port), port); - return -EINVAL; - } - if (!enable) parm |= 4 << 8; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 6615eb5147e2..a33887f2464f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -736,6 +736,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + + spin_unlock_irq(&uncore->lock); + for_each_engine(engine, gt, id) { /* * HW architecture suggest typical invalidation time at 40us, @@ -750,7 +764,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 95d41c01d0e0..35d55f98a06f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -4788,8 +4788,8 @@ static int live_lrc_layout(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); @@ -4965,8 +4965,8 @@ static int live_lrc_fixed(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 74e66dea5708..5e670aace59f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3964,8 +3964,8 @@ static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf, return ERR_PTR(err); } -static ssize_t show_dynamic_id(struct device *dev, - struct device_attribute *attr, +static ssize_t show_dynamic_id(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { struct i915_oa_config *oa_config = diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index a36a455ae336..534951ff38bb 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -54,7 +54,7 @@ struct i915_oa_config { struct attribute_group sysfs_metric; struct attribute *attrs[2]; - struct device_attribute sysfs_metric_id; + struct kobj_attribute sysfs_metric_id; struct kref ref; struct rcu_head rcu; diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 45d32ef42787..ac40a95374d3 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -500,7 +500,14 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct i915_gpu_coredump *gpu; - ssize_t ret; + ssize_t ret = 0; + + /* + * FIXME: Concurrent clients triggering resets and reading + clearing + * dumps can cause inconsistent sysfs reads when a user calls in with a + * non-zero offset to complete a prior partial read but the + * gpu_coredump has been cleared or replaced. + */ gpu = i915_first_error_state(i915); if (IS_ERR(gpu)) { @@ -512,8 +519,10 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, const char *str = "No error state collected\n"; size_t len = strlen(str); - ret = min_t(size_t, count, len - off); - memcpy(buf, str + off, ret); + if (off < len) { + ret = min_t(size_t, count, len - off); + memcpy(buf, str + off, ret); + } } return ret; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 472aaea75ef8..2f2dc029668b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2846,7 +2846,7 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, } static void intel_read_wm_latency(struct drm_i915_private *dev_priv, - u16 wm[8]) + u16 wm[]) { struct intel_uncore *uncore = &dev_priv->uncore; diff --git a/drivers/gpu/drm/imx/dcss/dcss-dev.c b/drivers/gpu/drm/imx/dcss/dcss-dev.c index c849533ca83e..3f5750cc2673 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-dev.c +++ b/drivers/gpu/drm/imx/dcss/dcss-dev.c @@ -207,6 +207,7 @@ struct dcss_dev *dcss_dev_create(struct device *dev, bool hdmi_output) ret = dcss_submodules_init(dcss); if (ret) { + of_node_put(dcss->of_port); dev_err(dev, "submodules initialization failed\n"); goto clks_err; } @@ -237,6 +238,8 @@ void dcss_dev_destroy(struct dcss_dev *dcss) dcss_clocks_disable(dcss); } + of_node_put(dcss->of_port); + pm_runtime_disable(dcss->dev); dcss_submodules_stop(dcss); diff --git a/drivers/gpu/drm/imx/dcss/dcss-plane.c b/drivers/gpu/drm/imx/dcss/dcss-plane.c index f54087ac44d3..46a188dd02ad 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-plane.c +++ b/drivers/gpu/drm/imx/dcss/dcss-plane.c @@ -268,7 +268,6 @@ static void dcss_plane_atomic_update(struct drm_plane *plane, struct dcss_plane *dcss_plane = to_dcss_plane(plane); struct dcss_dev *dcss = plane->dev->dev_private; struct drm_framebuffer *fb = state->fb; - u32 pixel_format; struct drm_crtc_state *crtc_state; bool modifiers_present; u32 src_w, src_h, dst_w, dst_h; @@ -279,7 +278,6 @@ static void dcss_plane_atomic_update(struct drm_plane *plane, if (!fb || !state->crtc || !state->visible) return; - pixel_format = state->fb->format->format; crtc_state = state->crtc->state; modifiers_present = !!(fb->flags & DRM_MODE_FB_MODIFIERS); diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index d412fc265395..fd9d8e51837f 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -68,7 +68,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc, drm_atomic_crtc_state_for_each_plane(plane, old_crtc_state) { if (plane == &ipu_crtc->plane[0]->base) disable_full = true; - if (&ipu_crtc->plane[1] && plane == &ipu_crtc->plane[1]->base) + if (ipu_crtc->plane[1] && plane == &ipu_crtc->plane[1]->base) disable_partial = true; } diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index b6bb5fc7d183..e34718cf5c2e 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -68,6 +69,21 @@ struct ingenic_drm { bool panel_is_sharp; bool no_vblank; + + /* + * clk_mutex is used to synchronize the pixel clock rate update with + * the VBLANK. When the pixel clock's parent clock needs to be updated, + * clock_nb's notifier function will lock the mutex, then wait until the + * next VBLANK. At that point, the parent clock's rate can be updated, + * and the mutex is then unlocked. If an atomic commit happens in the + * meantime, it will lock on the mutex, effectively waiting until the + * clock update process finishes. Finally, the pixel clock's rate will + * be recomputed when the mutex has been released, in the pending atomic + * commit, or a future one. + */ + struct mutex clk_mutex; + bool update_clk_rate; + struct notifier_block clock_nb; }; static const u32 ingenic_drm_primary_formats[] = { @@ -111,6 +127,29 @@ static inline struct ingenic_drm *drm_crtc_get_priv(struct drm_crtc *crtc) return container_of(crtc, struct ingenic_drm, crtc); } +static inline struct ingenic_drm *drm_nb_get_priv(struct notifier_block *nb) +{ + return container_of(nb, struct ingenic_drm, clock_nb); +} + +static int ingenic_drm_update_pixclk(struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct ingenic_drm *priv = drm_nb_get_priv(nb); + + switch (action) { + case PRE_RATE_CHANGE: + mutex_lock(&priv->clk_mutex); + priv->update_clk_rate = true; + drm_crtc_wait_one_vblank(&priv->crtc); + return NOTIFY_OK; + default: + mutex_unlock(&priv->clk_mutex); + return NOTIFY_OK; + } +} + static void ingenic_drm_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *state) { @@ -276,8 +315,14 @@ static void ingenic_drm_crtc_atomic_flush(struct drm_crtc *crtc, if (drm_atomic_crtc_needs_modeset(state)) { ingenic_drm_crtc_update_timings(priv, &state->mode); + priv->update_clk_rate = true; + } + if (priv->update_clk_rate) { + mutex_lock(&priv->clk_mutex); clk_set_rate(priv->pix_clk, state->adjusted_mode.clock * 1000); + priv->update_clk_rate = false; + mutex_unlock(&priv->clk_mutex); } if (event) { @@ -936,16 +981,28 @@ static int ingenic_drm_bind(struct device *dev, bool has_components) if (soc_info->has_osd) regmap_write(priv->map, JZ_REG_LCD_OSDC, JZ_LCD_OSDC_OSDEN); + mutex_init(&priv->clk_mutex); + priv->clock_nb.notifier_call = ingenic_drm_update_pixclk; + + parent_clk = clk_get_parent(priv->pix_clk); + ret = clk_notifier_register(parent_clk, &priv->clock_nb); + if (ret) { + dev_err(dev, "Unable to register clock notifier\n"); + goto err_devclk_disable; + } + ret = drm_dev_register(drm, 0); if (ret) { dev_err(dev, "Failed to register DRM driver\n"); - goto err_devclk_disable; + goto err_clk_notifier_unregister; } drm_fbdev_generic_setup(drm, 32); return 0; +err_clk_notifier_unregister: + clk_notifier_unregister(parent_clk, &priv->clock_nb); err_devclk_disable: if (priv->lcd_clk) clk_disable_unprepare(priv->lcd_clk); @@ -967,7 +1024,9 @@ static int compare_of(struct device *dev, void *data) static void ingenic_drm_unbind(struct device *dev) { struct ingenic_drm *priv = dev_get_drvdata(dev); + struct clk *parent_clk = clk_get_parent(priv->pix_clk); + clk_notifier_unregister(parent_clk, &priv->clock_nb); if (priv->lcd_clk) clk_disable_unprepare(priv->lcd_clk); clk_disable_unprepare(priv->pix_clk); diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c index cb29b649fcdb..12bf93769497 100644 --- a/drivers/gpu/drm/mediatek/mtk_cec.c +++ b/drivers/gpu/drm/mediatek/mtk_cec.c @@ -84,7 +84,7 @@ static void mtk_cec_mask(struct mtk_cec *cec, unsigned int offset, u32 tmp = readl(cec->regs + offset) & ~mask; tmp |= val & mask; - writel(val, cec->regs + offset); + writel(tmp, cec->regs + offset); } void mtk_cec_set_hpd_event(struct device *dev, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 39563daff4a0..dffc133b8b1c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1308,6 +1308,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) BUG_ON(!node); ret = a6xx_gmu_init(a6xx_gpu, node); + of_node_put(node); if (ret) { a6xx_destroy(&(a6xx_gpu->base.base)); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 458b5b26d3c2..de8cc25506d6 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -960,7 +960,8 @@ void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) release_firmware(adreno_gpu->fw[i]); - pm_runtime_disable(&priv->gpu_pdev->dev); + if (pm_runtime_enabled(&priv->gpu_pdev->dev)) + pm_runtime_disable(&priv->gpu_pdev->dev); msm_gpu_cleanup(&adreno_gpu->base); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c index 6f0f54588124..108882bbd2b8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c @@ -146,6 +146,7 @@ static void dpu_hw_intf_setup_timing_engine(struct dpu_hw_intf *ctx, active_v_end = active_v_start + (p->yres * hsync_period) - 1; display_v_start += p->hsync_pulse_width + p->h_back_porch; + display_v_end -= p->h_front_porch; active_hctl = (active_h_end << 16) | active_h_start; display_hctl = active_hctl; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 08e082d0443a..7503f093f3b6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -678,8 +678,10 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) for (i = 0; i < dpu_kms->catalog->vbif_count; i++) { u32 vbif_idx = dpu_kms->catalog->vbif[i].id; - if ((vbif_idx < VBIF_MAX) && dpu_kms->hw_vbif[vbif_idx]) + if ((vbif_idx < VBIF_MAX) && dpu_kms->hw_vbif[vbif_idx]) { dpu_hw_vbif_destroy(dpu_kms->hw_vbif[vbif_idx]); + dpu_kms->hw_vbif[vbif_idx] = NULL; + } } } @@ -937,7 +939,9 @@ static int dpu_kms_hw_init(struct msm_kms *kms) dpu_kms_parse_data_bus_icc_path(dpu_kms); - pm_runtime_get_sync(&dpu_kms->pdev->dev); + rc = pm_runtime_resume_and_get(&dpu_kms->pdev->dev); + if (rc < 0) + goto error; dpu_kms->core_rev = readl_relaxed(dpu_kms->mmio + 0x0); diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 913de5938782..b4d0bfc83d70 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -221,6 +221,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms, encoder = mdp4_lcdc_encoder_init(dev, panel_node); if (IS_ERR(encoder)) { DRM_DEV_ERROR(dev->dev, "failed to construct LCDC encoder\n"); + of_node_put(panel_node); return PTR_ERR(encoder); } @@ -230,6 +231,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms, connector = mdp4_lvds_connector_init(dev, panel_node, encoder); if (IS_ERR(connector)) { DRM_DEV_ERROR(dev->dev, "failed to initialize LVDS connector\n"); + of_node_put(panel_node); return PTR_ERR(connector); } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index a8fa084dfa49..ff4f207cbdea 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -608,9 +608,15 @@ int mdp5_crtc_setup_pipeline(struct drm_crtc *crtc, if (ret) return ret; - mdp5_mixer_release(new_crtc_state->state, old_mixer); + ret = mdp5_mixer_release(new_crtc_state->state, old_mixer); + if (ret) + return ret; + if (old_r_mixer) { - mdp5_mixer_release(new_crtc_state->state, old_r_mixer); + ret = mdp5_mixer_release(new_crtc_state->state, old_r_mixer); + if (ret) + return ret; + if (!need_right_mixer) pipeline->r_mixer = NULL; } @@ -977,8 +983,10 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, ret = msm_gem_get_and_pin_iova(cursor_bo, kms->aspace, &mdp5_crtc->cursor.iova); - if (ret) + if (ret) { + drm_gem_object_put(cursor_bo); return -EINVAL; + } pm_runtime_get_sync(&pdev->dev); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index e193865ce9a2..9baaaef706ab 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -598,9 +598,9 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) pdev = mdp5_kms->pdev; irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - if (irq < 0) { - ret = irq; - DRM_DEV_ERROR(&pdev->dev, "failed to get irq: %d\n", ret); + if (!irq) { + ret = -EINVAL; + DRM_DEV_ERROR(&pdev->dev, "failed to get irq\n"); goto fail; } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c index 954db683ae44..2536def2a000 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c @@ -116,21 +116,28 @@ int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc, return 0; } -void mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer) +int mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer) { struct mdp5_global_state *global_state = mdp5_get_global_state(s); - struct mdp5_hw_mixer_state *new_state = &global_state->hwmixer; + struct mdp5_hw_mixer_state *new_state; if (!mixer) - return; + return 0; + + if (IS_ERR(global_state)) + return PTR_ERR(global_state); + + new_state = &global_state->hwmixer; if (WARN_ON(!new_state->hwmixer_to_crtc[mixer->idx])) - return; + return -EINVAL; DBG("%s: release from crtc %s", mixer->name, new_state->hwmixer_to_crtc[mixer->idx]->name); new_state->hwmixer_to_crtc[mixer->idx] = NULL; + + return 0; } void mdp5_mixer_destroy(struct mdp5_hw_mixer *mixer) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h index 43c9ba43ce18..545ee223b9d7 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h @@ -30,7 +30,7 @@ void mdp5_mixer_destroy(struct mdp5_hw_mixer *lm); int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc, uint32_t caps, struct mdp5_hw_mixer **mixer, struct mdp5_hw_mixer **r_mixer); -void mdp5_mixer_release(struct drm_atomic_state *s, - struct mdp5_hw_mixer *mixer); +int mdp5_mixer_release(struct drm_atomic_state *s, + struct mdp5_hw_mixer *mixer); #endif /* __MDP5_LM_H__ */ diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c index ba6695963aa6..a4f5cb90f3e8 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c @@ -119,18 +119,23 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, return 0; } -void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) +int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) { struct msm_drm_private *priv = s->dev->dev_private; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); struct mdp5_global_state *state = mdp5_get_global_state(s); - struct mdp5_hw_pipe_state *new_state = &state->hwpipe; + struct mdp5_hw_pipe_state *new_state; if (!hwpipe) - return; + return 0; + + if (IS_ERR(state)) + return PTR_ERR(state); + + new_state = &state->hwpipe; if (WARN_ON(!new_state->hwpipe_to_plane[hwpipe->idx])) - return; + return -EINVAL; DBG("%s: release from plane %s", hwpipe->name, new_state->hwpipe_to_plane[hwpipe->idx]->name); @@ -141,6 +146,8 @@ void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) } new_state->hwpipe_to_plane[hwpipe->idx] = NULL; + + return 0; } void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h index 9b26d0761bd4..cca67938cab2 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h @@ -37,7 +37,7 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, uint32_t caps, uint32_t blkcfg, struct mdp5_hw_pipe **hwpipe, struct mdp5_hw_pipe **r_hwpipe); -void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe); +int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe); struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe, uint32_t reg_offset, uint32_t caps); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c index da0799333970..0dc23c86747e 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c @@ -393,12 +393,24 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state, mdp5_state->r_hwpipe = NULL; - mdp5_pipe_release(state->state, old_hwpipe); - mdp5_pipe_release(state->state, old_right_hwpipe); + ret = mdp5_pipe_release(state->state, old_hwpipe); + if (ret) + return ret; + + ret = mdp5_pipe_release(state->state, old_right_hwpipe); + if (ret) + return ret; + } } else { - mdp5_pipe_release(state->state, mdp5_state->hwpipe); - mdp5_pipe_release(state->state, mdp5_state->r_hwpipe); + ret = mdp5_pipe_release(state->state, mdp5_state->hwpipe); + if (ret) + return ret; + + ret = mdp5_pipe_release(state->state, mdp5_state->r_hwpipe); + if (ret) + return ret; + mdp5_state->hwpipe = mdp5_state->r_hwpipe = NULL; } diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index aeca8b2ac5c6..2da6982efdbf 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -572,7 +572,7 @@ void dp_catalog_ctrl_hpd_config(struct dp_catalog *dp_catalog) dp_write_aux(catalog, REG_DP_DP_HPD_CTRL, DP_DP_HPD_CTRL_HPD_EN); } -u32 dp_catalog_hpd_get_state_status(struct dp_catalog *dp_catalog) +u32 dp_catalog_link_is_connected(struct dp_catalog *dp_catalog) { struct dp_catalog_private *catalog = container_of(dp_catalog, struct dp_catalog_private, dp_catalog); diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.h b/drivers/gpu/drm/msm/dp/dp_catalog.h index 6d257dbebf29..176a9020a520 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.h +++ b/drivers/gpu/drm/msm/dp/dp_catalog.h @@ -97,7 +97,7 @@ void dp_catalog_ctrl_enable_irq(struct dp_catalog *dp_catalog, bool enable); void dp_catalog_hpd_config_intr(struct dp_catalog *dp_catalog, u32 intr_mask, bool en); void dp_catalog_ctrl_hpd_config(struct dp_catalog *dp_catalog); -u32 dp_catalog_hpd_get_state_status(struct dp_catalog *dp_catalog); +u32 dp_catalog_link_is_connected(struct dp_catalog *dp_catalog); u32 dp_catalog_hpd_get_intr_status(struct dp_catalog *dp_catalog); void dp_catalog_ctrl_phy_reset(struct dp_catalog *dp_catalog); int dp_catalog_ctrl_update_vx_px(struct dp_catalog *dp_catalog, u8 v_level, diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index c83a1650437d..b9ca844ce2ad 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1460,6 +1460,30 @@ static int dp_ctrl_reinitialize_mainlink(struct dp_ctrl_private *ctrl) return ret; } +static int dp_ctrl_deinitialize_mainlink(struct dp_ctrl_private *ctrl) +{ + struct dp_io *dp_io; + struct phy *phy; + int ret; + + dp_io = &ctrl->parser->io; + phy = dp_io->phy; + + dp_catalog_ctrl_mainlink_ctrl(ctrl->catalog, false); + + dp_catalog_ctrl_reset(ctrl->catalog); + + ret = dp_power_clk_enable(ctrl->power, DP_CTRL_PM, false); + if (ret) { + DRM_ERROR("Failed to disable link clocks. ret=%d\n", ret); + } + + phy_power_off(phy); + phy_exit(phy); + + return 0; +} + static int dp_ctrl_link_maintenance(struct dp_ctrl_private *ctrl) { int ret = 0; @@ -1640,8 +1664,7 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) if (rc) return rc; - while (--link_train_max_retries && - !atomic_read(&ctrl->dp_ctrl.aborted)) { + while (--link_train_max_retries) { rc = dp_ctrl_reinitialize_mainlink(ctrl); if (rc) { DRM_ERROR("Failed to reinitialize mainlink. rc=%d\n", @@ -1656,6 +1679,10 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) break; } else if (training_step == DP_TRAINING_1) { /* link train_1 failed */ + if (!dp_catalog_link_is_connected(ctrl->catalog)) { + break; + } + rc = dp_ctrl_link_rate_down_shift(ctrl); if (rc < 0) { /* already in RBR = 1.6G */ if (cr.lane_0_1 & DP_LANE0_1_CR_DONE) { @@ -1675,6 +1702,10 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) } } else if (training_step == DP_TRAINING_2) { /* link train_2 failed, lower lane rate */ + if (!dp_catalog_link_is_connected(ctrl->catalog)) { + break; + } + rc = dp_ctrl_link_lane_down_shift(ctrl); if (rc < 0) { /* end with failure */ @@ -1695,6 +1726,11 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) */ if (rc == 0) /* link train successfully */ dp_ctrl_push_idle(dp_ctrl); + else { + /* link training failed */ + dp_ctrl_deinitialize_mainlink(ctrl); + rc = -ECONNRESET; + } return rc; } diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 6cd6934c8c9f..a3de1d0523ea 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -45,7 +45,7 @@ enum { ST_CONNECT_PENDING, ST_CONNECTED, ST_DISCONNECT_PENDING, - ST_SUSPEND_PENDING, + ST_DISPLAY_OFF, ST_SUSPENDED, }; @@ -102,6 +102,8 @@ struct dp_display_private { struct dp_display_mode dp_mode; struct msm_dp dp_display; + bool encoder_mode_set; + /* wait for audio signaling */ struct completion audio_comp; @@ -111,6 +113,7 @@ struct dp_display_private { u32 hpd_state; u32 event_pndx; u32 event_gndx; + struct task_struct *ev_tsk; struct dp_event event_list[DP_EVENT_Q_MAX]; spinlock_t event_lock; @@ -194,6 +197,8 @@ void dp_display_signal_audio_complete(struct msm_dp *dp_display) complete_all(&dp->audio_comp); } +static int dp_hpd_event_thread_start(struct dp_display_private *dp_priv); + static int dp_display_bind(struct device *dev, struct device *master, void *data) { @@ -234,9 +239,18 @@ static int dp_display_bind(struct device *dev, struct device *master, } rc = dp_register_audio_driver(dev, dp->audio); - if (rc) + if (rc) { DRM_ERROR("Audio registration Dp failed\n"); + goto end; + } + rc = dp_hpd_event_thread_start(dp); + if (rc) { + DRM_ERROR("Event thread create failed\n"); + goto end; + } + + return 0; end: return rc; } @@ -255,6 +269,12 @@ static void dp_display_unbind(struct device *dev, struct device *master, return; } + /* disable all HPD interrupts */ + if (dp->core_initialized) + dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); + + kthread_stop(dp->ev_tsk); + dp_power_client_deinit(dp->power); dp_aux_unregister(dp->aux); priv->dp = NULL; @@ -288,13 +308,24 @@ static void dp_display_send_hpd_event(struct msm_dp *dp_display) drm_helper_hpd_irq_event(connector->dev); } -static int dp_display_send_hpd_notification(struct dp_display_private *dp, - bool hpd) + +static void dp_display_set_encoder_mode(struct dp_display_private *dp) { - static bool encoder_mode_set; struct msm_drm_private *priv = dp->dp_display.drm_dev->dev_private; struct msm_kms *kms = priv->kms; + if (!dp->encoder_mode_set && dp->dp_display.encoder && + kms->funcs->set_encoder_mode) { + kms->funcs->set_encoder_mode(kms, + dp->dp_display.encoder, false); + + dp->encoder_mode_set = true; + } +} + +static int dp_display_send_hpd_notification(struct dp_display_private *dp, + bool hpd) +{ if ((hpd && dp->dp_display.is_connected) || (!hpd && !dp->dp_display.is_connected)) { DRM_DEBUG_DP("HPD already %s\n", (hpd ? "on" : "off")); @@ -307,15 +338,6 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, dp->dp_display.is_connected = hpd; - if (dp->dp_display.is_connected && dp->dp_display.encoder - && !encoder_mode_set - && kms->funcs->set_encoder_mode) { - kms->funcs->set_encoder_mode(kms, - dp->dp_display.encoder, false); - DRM_DEBUG_DP("set_encoder_mode() Completed\n"); - encoder_mode_set = true; - } - dp_display_send_hpd_event(&dp->dp_display); return 0; @@ -351,7 +373,6 @@ static int dp_display_process_hpd_high(struct dp_display_private *dp) dp_add_event(dp, EV_USER_NOTIFICATION, true, 0); - end: return rc; } @@ -368,6 +389,8 @@ static void dp_display_host_init(struct dp_display_private *dp) if (dp->usbpd->orientation == ORIENTATION_CC2) flip = true; + dp_display_set_encoder_mode(dp); + dp_power_init(dp->power, flip); dp_ctrl_host_init(dp->ctrl, flip); dp_aux_init(dp->aux); @@ -451,25 +474,42 @@ static void dp_display_handle_video_request(struct dp_display_private *dp) } } +static int dp_display_handle_port_ststus_changed(struct dp_display_private *dp) +{ + int rc = 0; + + if (dp_display_is_sink_count_zero(dp)) { + DRM_DEBUG_DP("sink count is zero, nothing to do\n"); + if (dp->hpd_state != ST_DISCONNECTED) { + dp->hpd_state = ST_DISCONNECT_PENDING; + dp_add_event(dp, EV_USER_NOTIFICATION, false, 0); + } + } else { + if (dp->hpd_state == ST_DISCONNECTED) { + dp->hpd_state = ST_CONNECT_PENDING; + rc = dp_display_process_hpd_high(dp); + if (rc) + dp->hpd_state = ST_DISCONNECTED; + } + } + + return rc; +} + static int dp_display_handle_irq_hpd(struct dp_display_private *dp) { - u32 sink_request; + u32 sink_request = dp->link->sink_request; - sink_request = dp->link->sink_request; - - if (sink_request & DS_PORT_STATUS_CHANGED) { - dp_add_event(dp, EV_USER_NOTIFICATION, false, 0); - if (dp_display_is_sink_count_zero(dp)) { - DRM_DEBUG_DP("sink count is zero, nothing to do\n"); - return 0; + if (dp->hpd_state == ST_DISCONNECTED) { + if (sink_request & DP_LINK_STATUS_UPDATED) { + DRM_ERROR("Disconnected, no DP_LINK_STATUS_UPDATED\n"); + return -EINVAL; } - - return dp_display_process_hpd_high(dp); } dp_ctrl_handle_sink_request(dp->ctrl); - if (dp->link->sink_request & DP_TEST_LINK_VIDEO_PATTERN) + if (sink_request & DP_TEST_LINK_VIDEO_PATTERN) dp_display_handle_video_request(dp); return 0; @@ -478,7 +518,9 @@ static int dp_display_handle_irq_hpd(struct dp_display_private *dp) static int dp_display_usbpd_attention_cb(struct device *dev) { int rc = 0; + u32 sink_request; struct dp_display_private *dp; + struct dp_usbpd *hpd; if (!dev) { DRM_ERROR("invalid dev\n"); @@ -492,10 +534,17 @@ static int dp_display_usbpd_attention_cb(struct device *dev) return -ENODEV; } + hpd = dp->usbpd; + /* check for any test request issued by sink */ rc = dp_link_process_request(dp->link); - if (!rc) - dp_display_handle_irq_hpd(dp); + if (!rc) { + sink_request = dp->link->sink_request; + if (sink_request & DS_PORT_STATUS_CHANGED) + rc = dp_display_handle_port_ststus_changed(dp); + else + rc = dp_display_handle_irq_hpd(dp); + } return rc; } @@ -513,7 +562,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) mutex_lock(&dp->event_mutex); state = dp->hpd_state; - if (state == ST_SUSPEND_PENDING) { + if (state == ST_DISPLAY_OFF || state == ST_SUSPENDED) { mutex_unlock(&dp->event_mutex); return 0; } @@ -535,13 +584,18 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) hpd->hpd_high = 1; ret = dp_display_usbpd_configure_cb(&dp->pdev->dev); - if (ret) { /* failed */ + if (ret) { /* link train failed */ hpd->hpd_high = 0; dp->hpd_state = ST_DISCONNECTED; - } - /* start sanity checking */ - dp_add_event(dp, EV_CONNECT_PENDING_TIMEOUT, 0, tout); + if (ret == -ECONNRESET) { /* cable unplugged */ + dp->core_initialized = false; + } + + } else { + /* start sentinel checking in case of missing uevent */ + dp_add_event(dp, EV_CONNECT_PENDING_TIMEOUT, 0, tout); + } mutex_unlock(&dp->event_mutex); @@ -594,11 +648,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) mutex_lock(&dp->event_mutex); state = dp->hpd_state; - if (state == ST_SUSPEND_PENDING) { - mutex_unlock(&dp->event_mutex); - return 0; - } - if (state == ST_DISCONNECT_PENDING || state == ST_DISCONNECTED) { mutex_unlock(&dp->event_mutex); return 0; @@ -625,7 +674,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) */ dp_display_usbpd_disconnect_cb(&dp->pdev->dev); - /* start sanity checking */ + /* start sentinel checking in case of missing uevent */ dp_add_event(dp, EV_DISCONNECT_PENDING_TIMEOUT, 0, DP_TIMEOUT_5_SECOND); /* signal the disconnect event early to ensure proper teardown */ @@ -659,17 +708,21 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) { u32 state; + int ret; mutex_lock(&dp->event_mutex); /* irq_hpd can happen at either connected or disconnected state */ state = dp->hpd_state; - if (state == ST_SUSPEND_PENDING) { + if (state == ST_DISPLAY_OFF) { mutex_unlock(&dp->event_mutex); return 0; } - dp_display_usbpd_attention_cb(&dp->pdev->dev); + ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); + if (ret == -ECONNRESET) { /* cable unplugged */ + dp->core_initialized = false; + } mutex_unlock(&dp->event_mutex); @@ -814,6 +867,11 @@ static int dp_display_enable(struct dp_display_private *dp, u32 data) dp_display = g_dp_display; + if (dp_display->power_on) { + DRM_DEBUG_DP("Link already setup, return\n"); + return 0; + } + rc = dp_ctrl_on_stream(dp->ctrl); if (!rc) dp_display->power_on = true; @@ -846,6 +904,9 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) dp_display = g_dp_display; + if (!dp_display->power_on) + return 0; + /* wait only if audio was enabled */ if (dp_display->audio_enabled) { /* signal the disconnect event */ @@ -984,12 +1045,17 @@ static int hpd_event_thread(void *data) while (1) { if (timeout_mode) { wait_event_timeout(dp_priv->event_q, - (dp_priv->event_pndx == dp_priv->event_gndx), - EVENT_TIMEOUT); + (dp_priv->event_pndx == dp_priv->event_gndx) || + kthread_should_stop(), EVENT_TIMEOUT); } else { wait_event_interruptible(dp_priv->event_q, - (dp_priv->event_pndx != dp_priv->event_gndx)); + (dp_priv->event_pndx != dp_priv->event_gndx) || + kthread_should_stop()); } + + if (kthread_should_stop()) + break; + spin_lock_irqsave(&dp_priv->event_lock, flag); todo = &dp_priv->event_list[dp_priv->event_gndx]; if (todo->delay) { @@ -1062,12 +1128,17 @@ static int hpd_event_thread(void *data) return 0; } -static void dp_hpd_event_setup(struct dp_display_private *dp_priv) +static int dp_hpd_event_thread_start(struct dp_display_private *dp_priv) { - init_waitqueue_head(&dp_priv->event_q); - spin_lock_init(&dp_priv->event_lock); + /* set event q to empty */ + dp_priv->event_gndx = 0; + dp_priv->event_pndx = 0; - kthread_run(hpd_event_thread, dp_priv, "dp_hpd_handler"); + dp_priv->ev_tsk = kthread_run(hpd_event_thread, dp_priv, "dp_hpd_handler"); + if (IS_ERR(dp_priv->ev_tsk)) + return PTR_ERR(dp_priv->ev_tsk); + + return 0; } static irqreturn_t dp_display_irq_handler(int irq, void *dev_id) @@ -1091,7 +1162,7 @@ static irqreturn_t dp_display_irq_handler(int irq, void *dev_id) } if (hpd_isr_status & DP_DP_IRQ_HPD_INT_MASK) { - /* delete connect pending event first */ + /* stop sentinel connect pending checking */ dp_del_event(dp, EV_CONNECT_PENDING_TIMEOUT); dp_add_event(dp, EV_IRQ_HPD_INT, 0, 0); } @@ -1125,10 +1196,9 @@ int dp_display_request_irq(struct msm_dp *dp_display) dp = container_of(dp_display, struct dp_display_private, dp_display); dp->irq = irq_of_parse_and_map(dp->pdev->dev.of_node, 0); - if (dp->irq < 0) { - rc = dp->irq; - DRM_ERROR("failed to get irq: %d\n", rc); - return rc; + if (!dp->irq) { + DRM_ERROR("failed to get irq\n"); + return -EINVAL; } rc = devm_request_irq(&dp->pdev->dev, dp->irq, @@ -1167,8 +1237,11 @@ static int dp_display_probe(struct platform_device *pdev) return -EPROBE_DEFER; } + /* setup event q */ mutex_init(&dp->event_mutex); g_dp_display = &dp->dp_display; + init_waitqueue_head(&dp->event_q); + spin_lock_init(&dp->event_lock); /* Store DP audio handle inside DP display */ g_dp_display->dp_audio = dp->audio; @@ -1220,15 +1293,12 @@ static int dp_pm_resume(struct device *dev) dp_catalog_ctrl_hpd_config(dp->catalog); - status = dp_catalog_hpd_get_state_status(dp->catalog); + status = dp_catalog_link_is_connected(dp->catalog); - if (status) { + if (status) dp->dp_display.is_connected = true; - } else { + else dp->dp_display.is_connected = false; - /* make sure next resume host_init be called */ - dp->core_initialized = false; - } mutex_unlock(&dp->event_mutex); @@ -1250,6 +1320,9 @@ static int dp_pm_suspend(struct device *dev) dp->hpd_state = ST_SUSPENDED; + /* host_init will be called at pm_resume */ + dp->core_initialized = false; + mutex_unlock(&dp->event_mutex); return 0; @@ -1308,8 +1381,6 @@ void msm_dp_irq_postinstall(struct msm_dp *dp_display) dp = container_of(dp_display, struct dp_display_private, dp_display); - dp_hpd_event_setup(dp); - dp_add_event(dp, EV_HPD_INIT_SETUP, 0, 100); } @@ -1384,6 +1455,7 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) mutex_lock(&dp_display->event_mutex); + /* stop sentinel checking */ dp_del_event(dp_display, EV_CONNECT_PENDING_TIMEOUT); rc = dp_display_set_mode(dp, &dp_display->dp_mode); @@ -1402,7 +1474,7 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) state = dp_display->hpd_state; - if (state == ST_SUSPEND_PENDING) + if (state == ST_DISPLAY_OFF) dp_display_host_init(dp_display); dp_display_enable(dp_display, 0); @@ -1414,7 +1486,8 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) dp_display_unprepare(dp); } - if (state == ST_SUSPEND_PENDING) + /* manual kick off plug event to train link */ + if (state == ST_DISPLAY_OFF) dp_add_event(dp_display, EV_IRQ_HPD_INT, 0, 0); /* completed connection */ @@ -1446,6 +1519,7 @@ int msm_dp_display_disable(struct msm_dp *dp, struct drm_encoder *encoder) mutex_lock(&dp_display->event_mutex); + /* stop sentinel checking */ dp_del_event(dp_display, EV_DISCONNECT_PENDING_TIMEOUT); dp_display_disable(dp_display, 0); @@ -1459,7 +1533,7 @@ int msm_dp_display_disable(struct msm_dp *dp, struct drm_encoder *encoder) /* completed disconnection */ dp_display->hpd_state = ST_DISCONNECTED; } else { - dp_display->hpd_state = ST_SUSPEND_PENDING; + dp_display->hpd_state = ST_DISPLAY_OFF; } mutex_unlock(&dp_display->event_mutex); diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 2768d1d306f0..4e8a19114e87 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -196,6 +196,11 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel, &panel->aux->ddc); if (!dp_panel->edid) { DRM_ERROR("panel edid read failed\n"); + /* check edid read fail is due to unplug */ + if (!dp_catalog_link_is_connected(panel->catalog)) { + rc = -ETIMEDOUT; + goto end; + } /* fail safe edid */ mutex_lock(&connector->dev->mode_config.mutex); diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 64454a63bbac..51e8318cc8ff 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1371,10 +1371,10 @@ static int dsi_cmds2buf_tx(struct msm_dsi_host *msm_host, dsi_get_bpp(msm_host->format) / 8; len = dsi_cmd_dma_add(msm_host, msg); - if (!len) { + if (len < 0) { pr_err("%s: failed to add cmd type = 0x%x\n", __func__, msg->type); - return -EINVAL; + return len; } /* for video mode, do not send cmds more than @@ -1393,10 +1393,14 @@ static int dsi_cmds2buf_tx(struct msm_dsi_host *msm_host, } ret = dsi_cmd_dma_tx(msm_host, len); - if (ret < len) { - pr_err("%s: cmd dma tx failed, type=0x%x, data0=0x%x, len=%d\n", - __func__, msg->type, (*(u8 *)(msg->tx_buf)), len); - return -ECOMM; + if (ret < 0) { + pr_err("%s: cmd dma tx failed, type=0x%x, data0=0x%x, len=%d, ret=%d\n", + __func__, msg->type, (*(u8 *)(msg->tx_buf)), len, ret); + return ret; + } else if (ret < len) { + pr_err("%s: cmd dma tx failed, type=0x%x, data0=0x%x, ret=%d len=%d\n", + __func__, msg->type, (*(u8 *)(msg->tx_buf)), ret, len); + return -EIO; } return len; @@ -2139,9 +2143,12 @@ int msm_dsi_host_cmd_rx(struct mipi_dsi_host *host, } ret = dsi_cmds2buf_tx(msm_host, msg); - if (ret < msg->tx_len) { + if (ret < 0) { pr_err("%s: Read cmd Tx failed, %d\n", __func__, ret); return ret; + } else if (ret < msg->tx_len) { + pr_err("%s: Read cmd Tx failed, too short: %d\n", __func__, ret); + return -ECOMM; } /* diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 94f948ef279d..28b33b35a30c 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -142,6 +142,10 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) /* HDCP needs physical address of hdmi register */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, config->mmio_name); + if (!res) { + ret = -EINVAL; + goto fail; + } hdmi->mmio_phy_addr = res->start; hdmi->qfprom_mmio = msm_ioremap(pdev, @@ -311,9 +315,9 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, } hdmi->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - if (hdmi->irq < 0) { - ret = hdmi->irq; - DRM_DEV_ERROR(dev->dev, "failed to get irq: %d\n", ret); + if (!hdmi->irq) { + ret = -EINVAL; + DRM_DEV_ERROR(dev->dev, "failed to get irq\n"); goto fail; } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index e37e5afc680a..087efcb1f34c 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 515ef80816a0..8c64ce7288f1 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -17,7 +17,7 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj) int npages = obj->size >> PAGE_SHIFT; if (WARN_ON(!msm_obj->pages)) /* should have already pinned! */ - return NULL; + return ERR_PTR(-ENOMEM); return drm_prime_pages_to_sg(obj->dev, msm_obj->pages, npages); } diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 50d881794758..2dcb7d1cff7c 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -58,7 +58,7 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, u64 addr = iova; unsigned int i; - for_each_sg(sgt->sgl, sg, sgt->nents, i) { + for_each_sgtable_sg(sgt, sg, i) { size_t size = sg->length; phys_addr_t phys = sg_phys(sg); diff --git a/drivers/gpu/drm/nouveau/dispnv50/atom.h b/drivers/gpu/drm/nouveau/dispnv50/atom.h index 3d82b3c67dec..93f8f4f64578 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/atom.h +++ b/drivers/gpu/drm/nouveau/dispnv50/atom.h @@ -160,14 +160,14 @@ nv50_head_atom_get(struct drm_atomic_state *state, struct drm_crtc *crtc) static inline struct drm_encoder * nv50_head_atom_get_encoder(struct nv50_head_atom *atom) { - struct drm_encoder *encoder = NULL; + struct drm_encoder *encoder; /* We only ever have a single encoder */ drm_for_each_encoder_mask(encoder, atom->state.crtc->dev, atom->state.encoder_mask) - break; + return encoder; - return encoder; + return NULL; } #define nv50_wndw_atom(p) container_of((p), struct nv50_wndw_atom, state) diff --git a/drivers/gpu/drm/nouveau/dispnv50/crc.c b/drivers/gpu/drm/nouveau/dispnv50/crc.c index 66f32d965c72..5624a716e11c 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/crc.c +++ b/drivers/gpu/drm/nouveau/dispnv50/crc.c @@ -411,9 +411,18 @@ void nv50_crc_atomic_check_outp(struct nv50_atom *atom) struct nv50_head_atom *armh = nv50_head_atom(old_crtc_state); struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state); struct nv50_outp_atom *outp_atom; - struct nouveau_encoder *outp = - nv50_real_outp(nv50_head_atom_get_encoder(armh)); - struct drm_encoder *encoder = &outp->base.base; + struct nouveau_encoder *outp; + struct drm_encoder *encoder, *enc; + + enc = nv50_head_atom_get_encoder(armh); + if (!enc) + continue; + + outp = nv50_real_outp(enc); + if (!outp) + continue; + + encoder = &outp->base.base; if (!asyh->clr.crc) continue; @@ -464,8 +473,16 @@ void nv50_crc_atomic_set(struct nv50_head *head, struct drm_device *dev = crtc->dev; struct nv50_crc *crc = &head->crc; const struct nv50_crc_func *func = nv50_disp(dev)->core->func->crc; - struct nouveau_encoder *outp = - nv50_real_outp(nv50_head_atom_get_encoder(asyh)); + struct nouveau_encoder *outp; + struct drm_encoder *encoder; + + encoder = nv50_head_atom_get_encoder(asyh); + if (!encoder) + return; + + outp = nv50_real_outp(encoder); + if (!outp) + return; func->set_src(head, outp->or, nv50_crc_source_type(outp, asyh->crc.src), diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 92987daa5e17..5e72e6cb2f84 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -679,7 +679,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, goto out_free_dma; for (i = 0; i < npages; i += max) { - args.end = start + (max << PAGE_SHIFT); + if (args.start + (max << PAGE_SHIFT) > end) + args.end = end; + else + args.end = args.start + (max << PAGE_SHIFT); + ret = migrate_vma_setup(&args); if (ret) goto out_free_pfns; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c index dc184e857f85..5214fd0658b8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c @@ -135,10 +135,10 @@ nvkm_cstate_find_best(struct nvkm_clk *clk, struct nvkm_pstate *pstate, list_for_each_entry_from_reverse(cstate, &pstate->list, head) { if (nvkm_cstate_valid(clk, cstate, max_volt, clk->temp)) - break; + return cstate; } - return cstate; + return NULL; } static struct nvkm_cstate * @@ -169,6 +169,8 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei) if (!list_empty(&pstate->list)) { cstate = nvkm_cstate_get(clk, pstate, cstatei); cstate = nvkm_cstate_find_best(clk, pstate, cstate); + if (!cstate) + return -EINVAL; } else { cstate = &pstate->base; } diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 959dcbd8a29c..bf2c845ef3a2 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -676,7 +676,7 @@ static const struct drm_display_mode ampire_am_1280800n3tzqw_t00h_mode = { static const struct panel_desc ampire_am_1280800n3tzqw_t00h = { .modes = &ire_am_1280800n3tzqw_t00h_mode, .num_modes = 1, - .bpc = 6, + .bpc = 8, .size = { .width = 217, .height = 136, @@ -2144,6 +2144,7 @@ static const struct panel_desc innolux_g070y2_l01 = { .unprepare = 800, }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, .connector_type = DRM_MODE_CONNECTOR_LVDS, }; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index a70261809cdd..1dfc457bbefc 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -427,8 +427,8 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, if (args->retained) { if (args->madv == PANFROST_MADV_DONTNEED) - list_add_tail(&bo->base.madv_list, - &pfdev->shrinker_list); + list_move_tail(&bo->base.madv_list, + &pfdev->shrinker_list); else if (args->madv == PANFROST_MADV_WILLNEED) list_del_init(&bo->base.madv_list); } diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 7a854beef257..8f7af65a69e4 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -484,7 +484,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, err_pages: drm_gem_shmem_put_pages(&bo->base); err_bo: - drm_gem_object_put(&bo->base.base); + panfrost_gem_mapping_put(bomapping); return ret; } diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index e30834434442..ef111d460be2 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -473,6 +473,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode native_mode->vdisplay != 0 && native_mode->clock != 0) { mode = drm_mode_duplicate(dev, native_mode); + if (!mode) + return NULL; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; drm_mode_set_name(mode); @@ -487,6 +489,8 @@ static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encode * simpler. */ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false); + if (!mode) + return NULL; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name); } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 0f23144491e4..91568f166a8a 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -2097,10 +2097,10 @@ static int vop_bind(struct device *dev, struct device *master, void *data) vop_win_init(vop); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - vop->len = resource_size(res); vop->regs = devm_ioremap_resource(dev, res); if (IS_ERR(vop->regs)) return PTR_ERR(vop->regs); + vop->len = resource_size(res); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (res) { diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index 62488ac14923..089c00a8e7d4 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -527,8 +527,8 @@ static void ltdc_crtc_mode_set_nofb(struct drm_crtc *crtc) struct drm_device *ddev = crtc->dev; struct drm_connector_list_iter iter; struct drm_connector *connector = NULL; - struct drm_encoder *encoder = NULL; - struct drm_bridge *bridge = NULL; + struct drm_encoder *encoder = NULL, *en_iter; + struct drm_bridge *bridge = NULL, *br_iter; struct drm_display_mode *mode = &crtc->state->adjusted_mode; struct videomode vm; u32 hsync, vsync, accum_hbp, accum_vbp, accum_act_w, accum_act_h; @@ -538,15 +538,19 @@ static void ltdc_crtc_mode_set_nofb(struct drm_crtc *crtc) int ret; /* get encoder from crtc */ - drm_for_each_encoder(encoder, ddev) - if (encoder->crtc == crtc) + drm_for_each_encoder(en_iter, ddev) + if (en_iter->crtc == crtc) { + encoder = en_iter; break; + } if (encoder) { /* get bridge from encoder */ - list_for_each_entry(bridge, &encoder->bridge_chain, chain_node) - if (bridge->encoder == encoder) + list_for_each_entry(br_iter, &encoder->bridge_chain, chain_node) + if (br_iter->encoder == encoder) { + bridge = br_iter; break; + } /* Get the connector from encoder */ drm_connector_list_iter_begin(ddev, &iter); diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 29861fc81b35..c5912fd53772 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -71,7 +71,6 @@ static int sun4i_drv_bind(struct device *dev) goto free_drm; } - dev_set_drvdata(dev, drm); drm->dev_private = drv; INIT_LIST_HEAD(&drv->frontend_list); INIT_LIST_HEAD(&drv->engine_list); @@ -112,6 +111,8 @@ static int sun4i_drv_bind(struct device *dev) drm_fbdev_generic_setup(drm, 32); + dev_set_drvdata(dev, drm); + return 0; finish_poll: @@ -128,6 +129,7 @@ static void sun4i_drv_unbind(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); + dev_set_drvdata(dev, NULL); drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); drm_atomic_helper_shutdown(drm); diff --git a/drivers/gpu/drm/tilcdc/tilcdc_external.c b/drivers/gpu/drm/tilcdc/tilcdc_external.c index b177525588c1..8ece0dd0b469 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_external.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_external.c @@ -60,11 +60,13 @@ struct drm_connector *tilcdc_encoder_find_connector(struct drm_device *ddev, int tilcdc_add_component_encoder(struct drm_device *ddev) { struct tilcdc_drm_private *priv = ddev->dev_private; - struct drm_encoder *encoder; + struct drm_encoder *encoder = NULL, *iter; - list_for_each_entry(encoder, &ddev->mode_config.encoder_list, head) - if (encoder->possible_crtcs & (1 << priv->crtc->index)) + list_for_each_entry(iter, &ddev->mode_config.encoder_list, head) + if (iter->possible_crtcs & (1 << priv->crtc->index)) { + encoder = iter; break; + } if (!encoder) { dev_err(ddev->dev, "%s: No suitable encoder found\n", __func__); diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index ad691571d759..95fa6fc052a7 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -564,6 +564,7 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) struct vc4_hvs *hvs = NULL; int ret; u32 dispctrl; + u32 reg; hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL); if (!hvs) @@ -635,6 +636,26 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) vc4->hvs = hvs; + reg = HVS_READ(SCALER_DISPECTRL); + reg &= ~SCALER_DISPECTRL_DSP2_MUX_MASK; + HVS_WRITE(SCALER_DISPECTRL, + reg | VC4_SET_FIELD(0, SCALER_DISPECTRL_DSP2_MUX)); + + reg = HVS_READ(SCALER_DISPCTRL); + reg &= ~SCALER_DISPCTRL_DSP3_MUX_MASK; + HVS_WRITE(SCALER_DISPCTRL, + reg | VC4_SET_FIELD(3, SCALER_DISPCTRL_DSP3_MUX)); + + reg = HVS_READ(SCALER_DISPEOLN); + reg &= ~SCALER_DISPEOLN_DSP4_MUX_MASK; + HVS_WRITE(SCALER_DISPEOLN, + reg | VC4_SET_FIELD(3, SCALER_DISPEOLN_DSP4_MUX)); + + reg = HVS_READ(SCALER_DISPDITHER); + reg &= ~SCALER_DISPDITHER_DSP5_MUX_MASK; + HVS_WRITE(SCALER_DISPDITHER, + reg | VC4_SET_FIELD(3, SCALER_DISPDITHER_DSP5_MUX)); + dispctrl = HVS_READ(SCALER_DISPCTRL); dispctrl |= SCALER_DISPCTRL_ENABLE; @@ -642,10 +663,6 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) SCALER_DISPCTRL_DISPEIRQ(1) | SCALER_DISPCTRL_DISPEIRQ(2); - /* Set DSP3 (PV1) to use HVS channel 2, which would otherwise - * be unused. - */ - dispctrl &= ~SCALER_DISPCTRL_DSP3_MUX_MASK; dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ | SCALER_DISPCTRL_SLVWREIRQ | SCALER_DISPCTRL_SLVRDEIRQ | @@ -659,7 +676,6 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) SCALER_DISPCTRL_DSPEISLUR(1) | SCALER_DISPCTRL_DSPEISLUR(2) | SCALER_DISPCTRL_SCLEIRQ); - dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_DSP3_MUX); HVS_WRITE(SCALER_DISPCTRL, dispctrl); diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c index d13502ae973d..f8fa09dfea5d 100644 --- a/drivers/gpu/drm/vc4/vc4_txp.c +++ b/drivers/gpu/drm/vc4/vc4_txp.c @@ -295,12 +295,18 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn, if (WARN_ON(i == ARRAY_SIZE(drm_fmts))) return; - ctrl = TXP_GO | TXP_VSTART_AT_EOF | TXP_EI | + ctrl = TXP_GO | TXP_EI | VC4_SET_FIELD(0xf, TXP_BYTE_ENABLE) | VC4_SET_FIELD(txp_fmts[i], TXP_FORMAT); if (fb->format->has_alpha) ctrl |= TXP_ALPHA_ENABLE; + else + /* + * If TXP_ALPHA_ENABLE isn't set and TXP_ALPHA_INVERT is, the + * hardware will force the output padding to be 0xff. + */ + ctrl |= TXP_ALPHA_INVERT; gem = drm_fb_cma_get_gem_obj(fb, 0); TXP_WRITE(TXP_DST_PTR, gem->paddr + fb->offsets[0]); diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index 3f7d5797dac8..6516f605c771 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -177,6 +177,8 @@ static int virtio_gpu_conn_get_modes(struct drm_connector *connector) DRM_DEBUG("add mode: %dx%d\n", width, height); mode = drm_cvt_mode(connector->dev, width, height, 60, false, false, false); + if (!mode) + return count; mode->type |= DRM_MODE_TYPE_PREFERRED; drm_mode_probed_add(connector, mode); count++; diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index 74ad8bf98bfd..e8c5e3ac9fff 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -347,6 +347,12 @@ static int bigben_probe(struct hid_device *hid, bigben->report = list_entry(report_list->next, struct hid_report, list); + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + error = -ENODEV; + goto error_hw_stop; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); set_bit(FF_RUMBLE, hidinput->input->ffbit); diff --git a/drivers/hid/hid-elan.c b/drivers/hid/hid-elan.c index 0e8f424025fe..838673303f77 100644 --- a/drivers/hid/hid-elan.c +++ b/drivers/hid/hid-elan.c @@ -188,7 +188,6 @@ static int elan_input_configured(struct hid_device *hdev, struct hid_input *hi) ret = input_mt_init_slots(input, ELAN_MAX_FINGERS, INPUT_MT_POINTER); if (ret) { hid_err(hdev, "Failed to init elan MT slots: %d\n", ret); - input_free_device(input); return ret; } @@ -200,7 +199,6 @@ static int elan_input_configured(struct hid_device *hdev, struct hid_input *hi) hid_err(hdev, "Failed to register elan input device: %d\n", ret); input_mt_destroy_slots(input); - input_free_device(input); return ret; } diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 476cf05dc097..ad3b7a72ce55 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -743,6 +743,7 @@ #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 #define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5 +#define USB_DEVICE_ID_LENOVO_X12_TAB 0x60fe #define USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E 0x600e #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D 0x608d #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019 0x6019 diff --git a/drivers/hid/hid-led.c b/drivers/hid/hid-led.c index c2c66ceca132..7d82f8d426bb 100644 --- a/drivers/hid/hid-led.c +++ b/drivers/hid/hid-led.c @@ -366,7 +366,7 @@ static const struct hidled_config hidled_configs[] = { .type = DREAM_CHEEKY, .name = "Dream Cheeky Webmail Notifier", .short_name = "dream_cheeky", - .max_brightness = 31, + .max_brightness = 63, .num_leds = 1, .report_size = 9, .report_type = RAW_REQUEST, diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index e5a3704b9fe8..d686917cc3b1 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1990,6 +1990,12 @@ static const struct hid_device_id mt_devices[] = { USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB3) }, + /* Lenovo X12 TAB Gen 1 */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_LENOVO, + USB_DEVICE_ID_LENOVO_X12_TAB) }, + /* MosArt panels */ { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE, MT_USB_DEVICE(USB_VENDOR_ID_ASUS, @@ -2129,6 +2135,9 @@ static const struct hid_device_id mt_devices[] = { { .driver_data = MT_CLS_GOOGLE, HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_TOUCH_ROSE) }, + { .driver_data = MT_CLS_GOOGLE, + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_GOOGLE, + USB_DEVICE_ID_GOOGLE_WHISKERS) }, /* Generic MT device */ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) }, diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 5dbb949b1afd..10188b1a6a08 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -606,6 +606,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) */ if (newchannel->offermsg.offer.sub_channel_index == 0) { mutex_unlock(&vmbus_connection.channel_mutex); + cpus_read_unlock(); /* * Don't call free_channel(), because newchannel->kobj * is not initialized yet. diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index b9ac357e465d..5d820037e291 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1351,7 +1351,7 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } - add_interrupt_randomness(hv_get_vector(), 0); + add_interrupt_randomness(hv_get_vector()); } /* diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c index a4ec85207782..2e6d6a5cffa1 100644 --- a/drivers/hwmon/ibmaem.c +++ b/drivers/hwmon/ibmaem.c @@ -550,7 +550,7 @@ static int aem_init_aem1_inst(struct aem_ipmi_data *probe, u8 module_handle) res = platform_device_add(data->pdev); if (res) - goto ipmi_err; + goto dev_add_err; platform_set_drvdata(data->pdev, data); @@ -598,7 +598,9 @@ static int aem_init_aem1_inst(struct aem_ipmi_data *probe, u8 module_handle) ipmi_destroy_user(data->ipmi.user); ipmi_err: platform_set_drvdata(data->pdev, NULL); - platform_device_unregister(data->pdev); + platform_device_del(data->pdev); +dev_add_err: + platform_device_put(data->pdev); dev_err: ida_simple_remove(&aem_ida, data->id); id_err: @@ -690,7 +692,7 @@ static int aem_init_aem2_inst(struct aem_ipmi_data *probe, res = platform_device_add(data->pdev); if (res) - goto ipmi_err; + goto dev_add_err; platform_set_drvdata(data->pdev, data); @@ -738,7 +740,9 @@ static int aem_init_aem2_inst(struct aem_ipmi_data *probe, ipmi_destroy_user(data->ipmi.user); ipmi_err: platform_set_drvdata(data->pdev, NULL); - platform_device_unregister(data->pdev); + platform_device_del(data->pdev); +dev_add_err: + platform_device_put(data->pdev); dev_err: ida_simple_remove(&aem_ida, data->id); id_err: diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index 4956a8159227..8686d47b2fff 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -1376,7 +1376,7 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev) continue; conn->child_dev = coresight_find_csdev_by_fwnode(conn->child_fwnode); - if (conn->child_dev) { + if (conn->child_dev && conn->child_dev->has_conns_grp) { ret = coresight_make_links(csdev, conn, conn->child_dev); if (ret) @@ -1568,6 +1568,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) int nr_refcnts = 1; atomic_t *refcnts = NULL; struct coresight_device *csdev; + bool registered = false; csdev = kzalloc(sizeof(*csdev), GFP_KERNEL); if (!csdev) { @@ -1588,7 +1589,8 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); if (!refcnts) { ret = -ENOMEM; - goto err_free_csdev; + kfree(csdev); + goto err_out; } csdev->refcnt = refcnts; @@ -1613,6 +1615,13 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev)); dev_set_name(&csdev->dev, "%s", desc->name); + /* + * Make sure the device registration and the connection fixup + * are synchronised, so that we don't see uninitialised devices + * on the coresight bus while trying to resolve the connections. + */ + mutex_lock(&coresight_mutex); + ret = device_register(&csdev->dev); if (ret) { put_device(&csdev->dev); @@ -1620,7 +1629,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) * All resources are free'd explicitly via * coresight_device_release(), triggered from put_device(). */ - goto err_out; + goto out_unlock; } if (csdev->type == CORESIGHT_DEV_TYPE_SINK || @@ -1635,11 +1644,11 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) * from put_device(), which is in turn called from * function device_unregister(). */ - goto err_out; + goto out_unlock; } } - - mutex_lock(&coresight_mutex); + /* Device is now registered */ + registered = true; ret = coresight_create_conns_sysfs_group(csdev); if (!ret) @@ -1649,16 +1658,18 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) if (!ret && cti_assoc_ops && cti_assoc_ops->add) cti_assoc_ops->add(csdev); +out_unlock: mutex_unlock(&coresight_mutex); - if (ret) { + /* Success */ + if (!ret) + return csdev; + + /* Unregister the device if needed */ + if (registered) { coresight_unregister(csdev); return ERR_PTR(ret); } - return csdev; - -err_free_csdev: - kfree(csdev); err_out: /* Cleanup the connection information */ coresight_release_platform_data(NULL, desc->pdata); diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index 2dcf13de751f..1e98562f4287 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -379,9 +379,10 @@ static int debug_notifier_call(struct notifier_block *self, int cpu; struct debug_drvdata *drvdata; - mutex_lock(&debug_lock); + /* Bail out if we can't acquire the mutex or the functionality is off */ + if (!mutex_trylock(&debug_lock)) + return NOTIFY_DONE; - /* Bail out if the functionality is disabled */ if (!debug_enable) goto skip_dump; @@ -400,7 +401,7 @@ static int debug_notifier_call(struct notifier_block *self, skip_dump: mutex_unlock(&debug_lock); - return 0; + return NOTIFY_DONE; } static struct notifier_block debug_notifier = { diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index 66864f9cf7ac..7960fa4b8c5b 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -657,6 +657,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) unsigned int_addr_flag = 0; struct i2c_msg *m_start = msg; bool is_read; + u8 *dma_buf = NULL; dev_dbg(&adap->dev, "at91_xfer: processing %d messages:\n", num); @@ -704,7 +705,17 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) dev->msg = m_start; dev->recv_len_abort = false; + if (dev->use_dma) { + dma_buf = i2c_get_dma_safe_msg_buf(m_start, 1); + if (!dma_buf) { + ret = -ENOMEM; + goto out; + } + dev->buf = dma_buf; + } + ret = at91_do_twi_transfer(dev); + i2c_put_dma_safe_msg_buf(dma_buf, m_start, !ret); ret = (ret < 0) ? ret : num; out: diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index c1bbc4caeb5c..0abce487ead7 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -386,9 +386,9 @@ static irqreturn_t cdns_i2c_slave_isr(void *ptr) */ static irqreturn_t cdns_i2c_master_isr(void *ptr) { - unsigned int isr_status, avail_bytes, updatetx; + unsigned int isr_status, avail_bytes; unsigned int bytes_to_send; - bool hold_quirk; + bool updatetx; struct cdns_i2c *id = ptr; /* Signal completion only after everything is updated */ int done_flag = 0; @@ -408,11 +408,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) * Check if transfer size register needs to be updated again for a * large data receive operation. */ - updatetx = 0; - if (id->recv_count > id->curr_recv_count) - updatetx = 1; - - hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx; + updatetx = id->recv_count > id->curr_recv_count; /* When receiving, handle data interrupt and completion interrupt */ if (id->p_recv_buf && @@ -443,7 +439,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) break; } - if (cdns_is_holdquirk(id, hold_quirk)) + if (cdns_is_holdquirk(id, updatetx)) break; } @@ -454,7 +450,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) * maintain transfer size non-zero while performing a large * receive operation. */ - if (cdns_is_holdquirk(id, hold_quirk)) { + if (cdns_is_holdquirk(id, updatetx)) { /* wait while fifo is full */ while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) != (id->curr_recv_count - CDNS_I2C_FIFO_DEPTH)) @@ -476,22 +472,6 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) CDNS_I2C_XFER_SIZE_OFFSET); id->curr_recv_count = id->recv_count; } - } else if (id->recv_count && !hold_quirk && - !id->curr_recv_count) { - - /* Set the slave address in address register*/ - cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK, - CDNS_I2C_ADDR_OFFSET); - - if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) { - cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE, - CDNS_I2C_XFER_SIZE_OFFSET); - id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE; - } else { - cdns_i2c_writereg(id->recv_count, - CDNS_I2C_XFER_SIZE_OFFSET); - id->curr_recv_count = id->recv_count; - } } /* Clear hold (if not repeated start) and signal completion */ @@ -724,7 +704,7 @@ static void cdns_i2c_master_reset(struct i2c_adapter *adap) static int cdns_i2c_process_msg(struct cdns_i2c *id, struct i2c_msg *msg, struct i2c_adapter *adap) { - unsigned long time_left; + unsigned long time_left, msg_timeout; u32 reg; id->p_msg = msg; @@ -749,8 +729,16 @@ static int cdns_i2c_process_msg(struct cdns_i2c *id, struct i2c_msg *msg, else cdns_i2c_msend(id); + /* Minimal time to execute this message */ + msg_timeout = msecs_to_jiffies((1000 * msg->len * BITS_PER_BYTE) / id->i2c_clk); + /* Plus some wiggle room */ + msg_timeout += msecs_to_jiffies(500); + + if (msg_timeout < adap->timeout) + msg_timeout = adap->timeout; + /* Wait for the signal of completion */ - time_left = wait_for_completion_timeout(&id->xfer_done, adap->timeout); + time_left = wait_for_completion_timeout(&id->xfer_done, msg_timeout); if (time_left == 0) { cdns_i2c_master_reset(adap); dev_err(id->adap.dev.parent, @@ -1281,6 +1269,7 @@ static int cdns_i2c_probe(struct platform_device *pdev) return 0; err_clk_dis: + clk_notifier_unregister(id->clk, &id->clk_rate_change_nb); clk_disable_unprepare(id->clk); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index 3c19aada4b30..9468c6c89b3f 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -474,9 +474,6 @@ int i2c_dw_prepare_clk(struct dw_i2c_dev *dev, bool prepare) { int ret; - if (IS_ERR(dev->clk)) - return PTR_ERR(dev->clk); - if (prepare) { /* Optional interface clock */ ret = clk_prepare_enable(dev->pclk); diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 0dfeb2d11603..ad91c7c0faa5 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -266,8 +266,17 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) goto exit_reset; } - dev->clk = devm_clk_get(&pdev->dev, NULL); - if (!i2c_dw_prepare_clk(dev, true)) { + dev->clk = devm_clk_get_optional(&pdev->dev, NULL); + if (IS_ERR(dev->clk)) { + ret = PTR_ERR(dev->clk); + goto exit_reset; + } + + ret = i2c_dw_prepare_clk(dev, true); + if (ret) + goto exit_reset; + + if (dev->clk) { u64 clk_khz; dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz; diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index a35a27c320e7..3d2d92640651 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -82,6 +82,7 @@ #define ISMT_DESC_ENTRIES 2 /* number of descriptor entries */ #define ISMT_MAX_RETRIES 3 /* number of SMBus retries to attempt */ +#define ISMT_LOG_ENTRIES 3 /* number of interrupt cause log entries */ /* Hardware Descriptor Constants - Control Field */ #define ISMT_DESC_CWRL 0x01 /* Command/Write Length */ @@ -175,6 +176,8 @@ struct ismt_priv { u8 head; /* ring buffer head pointer */ struct completion cmp; /* interrupt completion */ u8 buffer[I2C_SMBUS_BLOCK_MAX + 16]; /* temp R/W data buffer */ + dma_addr_t log_dma; + u32 *log; }; static const struct pci_device_id ismt_ids[] = { @@ -409,6 +412,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, memset(desc, 0, sizeof(struct ismt_desc)); desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, read_write); + /* Always clear the log entries */ + memset(priv->log, 0, ISMT_LOG_ENTRIES * sizeof(u32)); + /* Initialize common control bits */ if (likely(pci_dev_msi_enabled(priv->pci_dev))) desc->control = ISMT_DESC_INT | ISMT_DESC_FAIR; @@ -693,6 +699,8 @@ static void ismt_hw_init(struct ismt_priv *priv) /* initialize the Master Descriptor Base Address (MDBA) */ writeq(priv->io_rng_dma, priv->smba + ISMT_MSTR_MDBA); + writeq(priv->log_dma, priv->smba + ISMT_GR_SMTICL); + /* initialize the Master Control Register (MCTRL) */ writel(ISMT_MCTRL_MEIE, priv->smba + ISMT_MSTR_MCTRL); @@ -780,6 +788,12 @@ static int ismt_dev_init(struct ismt_priv *priv) priv->head = 0; init_completion(&priv->cmp); + priv->log = dmam_alloc_coherent(&priv->pci_dev->dev, + ISMT_LOG_ENTRIES * sizeof(u32), + &priv->log_dma, GFP_KERNEL); + if (!priv->log) + return -ENOMEM; + return 0; } diff --git a/drivers/i2c/busses/i2c-mt7621.c b/drivers/i2c/busses/i2c-mt7621.c index 45fe4a7fe0c0..901f0fb04fee 100644 --- a/drivers/i2c/busses/i2c-mt7621.c +++ b/drivers/i2c/busses/i2c-mt7621.c @@ -304,7 +304,8 @@ static int mtk_i2c_probe(struct platform_device *pdev) if (i2c->bus_freq == 0) { dev_warn(i2c->dev, "clock-frequency 0 not supported\n"); - return -EINVAL; + ret = -EINVAL; + goto err_disable_clk; } adap = &i2c->adap; @@ -322,10 +323,15 @@ static int mtk_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(adap); if (ret < 0) - return ret; + goto err_disable_clk; dev_info(&pdev->dev, "clock %u kHz\n", i2c->bus_freq / 1000); + return 0; + +err_disable_clk: + clk_disable_unprepare(i2c->clk); + return ret; } diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 2ad166355ec9..d9ac62c1ac25 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -359,14 +359,14 @@ static int npcm_i2c_get_SCL(struct i2c_adapter *_adap) { struct npcm_i2c *bus = container_of(_adap, struct npcm_i2c, adap); - return !!(I2CCTL3_SCL_LVL & ioread32(bus->reg + NPCM_I2CCTL3)); + return !!(I2CCTL3_SCL_LVL & ioread8(bus->reg + NPCM_I2CCTL3)); } static int npcm_i2c_get_SDA(struct i2c_adapter *_adap) { struct npcm_i2c *bus = container_of(_adap, struct npcm_i2c, adap); - return !!(I2CCTL3_SDA_LVL & ioread32(bus->reg + NPCM_I2CCTL3)); + return !!(I2CCTL3_SDA_LVL & ioread8(bus->reg + NPCM_I2CCTL3)); } static inline u16 npcm_i2c_get_index(struct npcm_i2c *bus) @@ -563,6 +563,15 @@ static inline void npcm_i2c_nack(struct npcm_i2c *bus) iowrite8(val, bus->reg + NPCM_I2CCTL1); } +static inline void npcm_i2c_clear_master_status(struct npcm_i2c *bus) +{ + u8 val; + + /* Clear NEGACK, STASTR and BER bits */ + val = NPCM_I2CST_BER | NPCM_I2CST_NEGACK | NPCM_I2CST_STASTR; + iowrite8(val, bus->reg + NPCM_I2CST); +} + #if IS_ENABLED(CONFIG_I2C_SLAVE) static void npcm_i2c_slave_int_enable(struct npcm_i2c *bus, bool enable) { @@ -642,8 +651,8 @@ static void npcm_i2c_reset(struct npcm_i2c *bus) iowrite8(NPCM_I2CCST_BB, bus->reg + NPCM_I2CCST); iowrite8(0xFF, bus->reg + NPCM_I2CST); - /* Clear EOB bit */ - iowrite8(NPCM_I2CCST3_EO_BUSY, bus->reg + NPCM_I2CCST3); + /* Clear and disable EOB */ + npcm_i2c_eob_int(bus, false); /* Clear all fifo bits: */ iowrite8(NPCM_I2CFIF_CTS_CLR_FIFO, bus->reg + NPCM_I2CFIF_CTS); @@ -655,6 +664,9 @@ static void npcm_i2c_reset(struct npcm_i2c *bus) } #endif + /* clear status bits for spurious interrupts */ + npcm_i2c_clear_master_status(bus); + bus->state = I2C_IDLE; } @@ -815,15 +827,6 @@ static void npcm_i2c_read_fifo(struct npcm_i2c *bus, u8 bytes_in_fifo) } } -static inline void npcm_i2c_clear_master_status(struct npcm_i2c *bus) -{ - u8 val; - - /* Clear NEGACK, STASTR and BER bits */ - val = NPCM_I2CST_BER | NPCM_I2CST_NEGACK | NPCM_I2CST_STASTR; - iowrite8(val, bus->reg + NPCM_I2CST); -} - static void npcm_i2c_master_abort(struct npcm_i2c *bus) { /* Only current master is allowed to issue a stop condition */ @@ -1231,7 +1234,16 @@ static irqreturn_t npcm_i2c_int_slave_handler(struct npcm_i2c *bus) ret = IRQ_HANDLED; } /* SDAST */ - return ret; + /* + * if irq is not one of the above, make sure EOB is disabled and all + * status bits are cleared. + */ + if (ret == IRQ_NONE) { + npcm_i2c_eob_int(bus, false); + npcm_i2c_clear_master_status(bus); + } + + return IRQ_HANDLED; } static int npcm_i2c_reg_slave(struct i2c_client *client) @@ -1467,6 +1479,9 @@ static void npcm_i2c_irq_handle_nack(struct npcm_i2c *bus) npcm_i2c_eob_int(bus, false); npcm_i2c_master_stop(bus); + /* Clear SDA Status bit (by reading dummy byte) */ + npcm_i2c_rd_byte(bus); + /* * The bus is released from stall only after the SW clears * NEGACK bit. Then a Stop condition is sent. @@ -1474,6 +1489,8 @@ static void npcm_i2c_irq_handle_nack(struct npcm_i2c *bus) npcm_i2c_clear_master_status(bus); readx_poll_timeout_atomic(ioread8, bus->reg + NPCM_I2CCST, val, !(val & NPCM_I2CCST_BUSY), 10, 200); + /* verify no status bits are still set after bus is released */ + npcm_i2c_clear_master_status(bus); } bus->state = I2C_IDLE; @@ -1672,10 +1689,10 @@ static int npcm_i2c_recovery_tgclk(struct i2c_adapter *_adap) int iter = 27; if ((npcm_i2c_get_SDA(_adap) == 1) && (npcm_i2c_get_SCL(_adap) == 1)) { - dev_dbg(bus->dev, "bus%d recovery skipped, bus not stuck", - bus->num); + dev_dbg(bus->dev, "bus%d-0x%x recovery skipped, bus not stuck", + bus->num, bus->dest_addr); npcm_i2c_reset(bus); - return status; + return 0; } npcm_i2c_int_enable(bus, false); @@ -1909,6 +1926,7 @@ static int npcm_i2c_init_module(struct npcm_i2c *bus, enum i2c_mode mode, bus_freq_hz < I2C_FREQ_MIN_HZ || bus_freq_hz > I2C_FREQ_MAX_HZ) return -EINVAL; + npcm_i2c_int_enable(bus, false); npcm_i2c_disable(bus); /* Configure FIFO mode : */ @@ -1937,10 +1955,17 @@ static int npcm_i2c_init_module(struct npcm_i2c *bus, enum i2c_mode mode, val = (val | NPCM_I2CCTL1_NMINTE) & ~NPCM_I2CCTL1_RWS; iowrite8(val, bus->reg + NPCM_I2CCTL1); - npcm_i2c_int_enable(bus, true); - npcm_i2c_reset(bus); + /* check HW is OK: SDA and SCL should be high at this point. */ + if ((npcm_i2c_get_SDA(&bus->adap) == 0) || (npcm_i2c_get_SCL(&bus->adap) == 0)) { + dev_err(bus->dev, "I2C%d init fail: lines are low\n", bus->num); + dev_err(bus->dev, "SDA=%d SCL=%d\n", npcm_i2c_get_SDA(&bus->adap), + npcm_i2c_get_SCL(&bus->adap)); + return -ENXIO; + } + + npcm_i2c_int_enable(bus, true); return 0; } @@ -1988,10 +2013,14 @@ static irqreturn_t npcm_i2c_bus_irq(int irq, void *dev_id) #if IS_ENABLED(CONFIG_I2C_SLAVE) if (bus->slave) { bus->master_or_slave = I2C_SLAVE; - return npcm_i2c_int_slave_handler(bus); + if (npcm_i2c_int_slave_handler(bus)) + return IRQ_HANDLED; } #endif - return IRQ_NONE; + /* clear status bits for spurious interrupts */ + npcm_i2c_clear_master_status(bus); + + return IRQ_HANDLED; } static bool npcm_i2c_master_start_xmit(struct npcm_i2c *bus, @@ -2047,8 +2076,7 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, u16 nwrite, nread; u8 *write_data, *read_data; u8 slave_addr; - int timeout; - int ret = 0; + unsigned long timeout; bool read_block = false; bool read_PEC = false; u8 bus_busy; @@ -2099,13 +2127,13 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, * 9: bits per transaction (including the ack/nack) */ timeout_usec = (2 * 9 * USEC_PER_SEC / bus->bus_freq) * (2 + nread + nwrite); - timeout = max(msecs_to_jiffies(35), usecs_to_jiffies(timeout_usec)); + timeout = max_t(unsigned long, bus->adap.timeout, usecs_to_jiffies(timeout_usec)); if (nwrite >= 32 * 1024 || nread >= 32 * 1024) { dev_err(bus->dev, "i2c%d buffer too big\n", bus->num); return -EINVAL; } - time_left = jiffies + msecs_to_jiffies(DEFAULT_STALL_COUNT) + 1; + time_left = jiffies + timeout + 1; do { /* * we must clear slave address immediately when the bus is not @@ -2138,12 +2166,12 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, bus->read_block_use = read_block; reinit_completion(&bus->cmd_complete); - if (!npcm_i2c_master_start_xmit(bus, slave_addr, nwrite, nread, - write_data, read_data, read_PEC, - read_block)) - ret = -EBUSY; - if (ret != -EBUSY) { + npcm_i2c_int_enable(bus, true); + + if (npcm_i2c_master_start_xmit(bus, slave_addr, nwrite, nread, + write_data, read_data, read_PEC, + read_block)) { time_left = wait_for_completion_timeout(&bus->cmd_complete, timeout); @@ -2157,26 +2185,31 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, } } } - ret = bus->cmd_err; /* if there was BER, check if need to recover the bus: */ if (bus->cmd_err == -EAGAIN) - ret = i2c_recover_bus(adap); + bus->cmd_err = i2c_recover_bus(adap); /* * After any type of error, check if LAST bit is still set, * due to a HW issue. * It cannot be cleared without resetting the module. */ - if (bus->cmd_err && - (NPCM_I2CRXF_CTL_LAST_PEC & ioread8(bus->reg + NPCM_I2CRXF_CTL))) + else if (bus->cmd_err && + (NPCM_I2CRXF_CTL_LAST_PEC & ioread8(bus->reg + NPCM_I2CRXF_CTL))) npcm_i2c_reset(bus); + /* after any xfer, successful or not, stall and EOB must be disabled */ + npcm_i2c_stall_after_start(bus, false); + npcm_i2c_eob_int(bus, false); + #if IS_ENABLED(CONFIG_I2C_SLAVE) /* reenable slave if it was enabled */ if (bus->slave) iowrite8((bus->slave->addr & 0x7F) | NPCM_I2CADDR_SAEN, bus->reg + NPCM_I2CADDR1); +#else + npcm_i2c_int_enable(bus, false); #endif return bus->cmd_err; } @@ -2269,7 +2302,7 @@ static int npcm_i2c_probe_bus(struct platform_device *pdev) adap = &bus->adap; adap->owner = THIS_MODULE; adap->retries = 3; - adap->timeout = HZ; + adap->timeout = msecs_to_jiffies(35); adap->algo = &npcm_i2c_algo; adap->quirks = &npcm_i2c_quirks; adap->algo_data = bus; @@ -2336,8 +2369,7 @@ static struct platform_driver npcm_i2c_bus_driver = { static int __init npcm_i2c_init(void) { npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL); - platform_driver_register(&npcm_i2c_bus_driver); - return 0; + return platform_driver_register(&npcm_i2c_bus_driver); } module_init(npcm_i2c_init); diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 8722ca23f889..6a7a7a074a97 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -999,8 +999,10 @@ static int rcar_i2c_probe(struct platform_device *pdev) pm_runtime_enable(dev); pm_runtime_get_sync(dev); ret = rcar_i2c_clock_calculate(priv); - if (ret < 0) - goto out_pm_put; + if (ret < 0) { + pm_runtime_put(dev); + goto out_pm_disable; + } rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ @@ -1029,19 +1031,19 @@ static int rcar_i2c_probe(struct platform_device *pdev) ret = platform_get_irq(pdev, 0); if (ret < 0) - goto out_pm_disable; + goto out_pm_put; priv->irq = ret; ret = devm_request_irq(dev, priv->irq, irqhandler, irqflags, dev_name(dev), priv); if (ret < 0) { dev_err(dev, "cannot get irq %d\n", priv->irq); - goto out_pm_disable; + goto out_pm_put; } platform_set_drvdata(pdev, priv); ret = i2c_add_numbered_adapter(adap); if (ret < 0) - goto out_pm_disable; + goto out_pm_put; if (priv->flags & ID_P_HOST_NOTIFY) { priv->host_notify_client = i2c_new_slave_host_notify_device(adap); @@ -1058,7 +1060,8 @@ static int rcar_i2c_probe(struct platform_device *pdev) out_del_device: i2c_del_adapter(&priv->adap); out_pm_put: - pm_runtime_put(dev); + if (priv->flags & ID_P_PM_BLOCKED) + pm_runtime_put(dev); out_pm_disable: pm_runtime_disable(dev); return ret; diff --git a/drivers/i2c/busses/i2c-thunderx-pcidrv.c b/drivers/i2c/busses/i2c-thunderx-pcidrv.c index 12c90aa0900e..a77cd86fe75e 100644 --- a/drivers/i2c/busses/i2c-thunderx-pcidrv.c +++ b/drivers/i2c/busses/i2c-thunderx-pcidrv.c @@ -213,6 +213,7 @@ static int thunder_i2c_probe_pci(struct pci_dev *pdev, i2c->adap.bus_recovery_info = &octeon_i2c_recovery_info; i2c->adap.dev.parent = dev; i2c->adap.dev.of_node = pdev->dev.of_node; + i2c->adap.dev.fwnode = dev->fwnode; snprintf(i2c->adap.name, sizeof(i2c->adap.name), "Cavium ThunderX i2c adapter at %s", dev_name(dev)); i2c_set_adapdata(&i2c->adap, i2c); diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index d79335506ecd..b92b032fb6d1 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -47,11 +47,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -93,6 +95,12 @@ static unsigned int mwait_substates __initdata; */ #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) +/* + * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE + * above. + */ +#define CPUIDLE_FLAG_IBRS BIT(16) + /* * MWAIT takes an 8-bit "hint" in EAX "suggesting" * the C-state (top nibble) and sub-state (bottom nibble) @@ -132,6 +140,24 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, return index; } +static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + bool smt_active = sched_smt_active(); + u64 spec_ctrl = spec_ctrl_current(); + int ret; + + if (smt_active) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + + ret = intel_idle(dev, drv, index); + + if (smt_active) + wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); + + return ret; +} + /** * intel_idle_s2idle - Ask the processor to enter the given idle state. * @dev: cpuidle device of the target CPU. @@ -653,7 +679,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 85, .target_residency = 200, .enter = &intel_idle, @@ -661,7 +687,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C7s", .desc = "MWAIT 0x33", - .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 124, .target_residency = 800, .enter = &intel_idle, @@ -669,7 +695,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C8", .desc = "MWAIT 0x40", - .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 200, .target_residency = 800, .enter = &intel_idle, @@ -677,7 +703,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C9", .desc = "MWAIT 0x50", - .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 480, .target_residency = 5000, .enter = &intel_idle, @@ -685,7 +711,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C10", .desc = "MWAIT 0x60", - .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 890, .target_residency = 5000, .enter = &intel_idle, @@ -714,7 +740,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 133, .target_residency = 600, .enter = &intel_idle, @@ -1501,6 +1527,11 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) /* Structure copy. */ drv->states[drv->state_count] = cpuidle_state_table[cstate]; + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && + cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { + drv->states[drv->state_count].enter = intel_idle_ibrs; + } + if ((disabled_states_mask & BIT(drv->state_count)) || ((icpu->use_acpi || force_use_acpi) && intel_idle_off_by_default(mwait_hint) && diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c index da56488182d0..6aa5a72c89b2 100644 --- a/drivers/iio/accel/bma180.c +++ b/drivers/iio/accel/bma180.c @@ -1068,11 +1068,12 @@ static int bma180_probe(struct i2c_client *client, data->trig->dev.parent = dev; data->trig->ops = &bma180_trigger_ops; iio_trigger_set_drvdata(data->trig, indio_dev); - indio_dev->trig = iio_trigger_get(data->trig); ret = iio_trigger_register(data->trig); if (ret) goto err_trigger_free; + + indio_dev->trig = iio_trigger_get(data->trig); } ret = iio_triggered_buffer_setup(indio_dev, NULL, diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index e7e280282774..b12e80464706 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -1496,10 +1496,14 @@ static int mma8452_reset(struct i2c_client *client) int i; int ret; - ret = i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2, + /* + * Find on fxls8471, after config reset bit, it reset immediately, + * and will not give ACK, so here do not check the return value. + * The following code will read the reset register, and check whether + * this reset works. + */ + i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2, MMA8452_CTRL_REG2_RST); - if (ret < 0) - return ret; for (i = 0; i < 10; i++) { usleep_range(100, 200); @@ -1542,11 +1546,13 @@ static int mma8452_probe(struct i2c_client *client, mutex_init(&data->lock); data->chip_info = device_get_match_data(&client->dev); - if (!data->chip_info && id) { - data->chip_info = &mma_chip_info_table[id->driver_data]; - } else { - dev_err(&client->dev, "unknown device model\n"); - return -ENODEV; + if (!data->chip_info) { + if (id) { + data->chip_info = &mma_chip_info_table[id->driver_data]; + } else { + dev_err(&client->dev, "unknown device model\n"); + return -ENODEV; + } } data->vdd_reg = devm_regulator_get(&client->dev, "vdd"); diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c index 5a2b0ffbb145..ecd9d8ad5928 100644 --- a/drivers/iio/accel/mxc4005.c +++ b/drivers/iio/accel/mxc4005.c @@ -461,8 +461,6 @@ static int mxc4005_probe(struct i2c_client *client, data->dready_trig->dev.parent = &client->dev; data->dready_trig->ops = &mxc4005_trigger_ops; iio_trigger_set_drvdata(data->dready_trig, indio_dev); - indio_dev->trig = data->dready_trig; - iio_trigger_get(indio_dev->trig); ret = devm_iio_trigger_register(&client->dev, data->dready_trig); if (ret) { @@ -470,6 +468,8 @@ static int mxc4005_probe(struct i2c_client *client, "failed to register trigger\n"); return ret; } + + indio_dev->trig = iio_trigger_get(data->dready_trig); } return devm_iio_device_register(&client->dev, indio_dev); diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index bd3500995037..19ab7d7251bc 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -170,7 +170,6 @@ static const struct iio_chan_spec ad7124_channel_template = { .sign = 'u', .realbits = 24, .storagebits = 32, - .shift = 8, .endianness = IIO_BE, }, }; diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index 9109da2d2e15..cbe1011a2408 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -334,16 +334,19 @@ static struct adi_axi_adc_client *adi_axi_adc_attach_client(struct device *dev) if (!try_module_get(cl->dev->driver->owner)) { mutex_unlock(®istered_clients_lock); + of_node_put(cln); return ERR_PTR(-ENODEV); } get_device(cl->dev); cl->info = info; mutex_unlock(®istered_clients_lock); + of_node_put(cln); return cl; } mutex_unlock(®istered_clients_lock); + of_node_put(cln); return ERR_PTR(-EPROBE_DEFER); } diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 5f5e8b39e4d2..84dbe9e2f0ef 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -196,6 +196,14 @@ static const struct dmi_system_id axp288_adc_ts_bias_override[] = { }, .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA, }, + { + /* Nuvision Solo 10 Draw */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TMAX"), + DMI_MATCH(DMI_PRODUCT_NAME, "TM101W610L"), + }, + .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA, + }, {} }; diff --git a/drivers/iio/adc/sc27xx_adc.c b/drivers/iio/adc/sc27xx_adc.c index aa32a1f385e2..2b463e1cf1c7 100644 --- a/drivers/iio/adc/sc27xx_adc.c +++ b/drivers/iio/adc/sc27xx_adc.c @@ -36,8 +36,8 @@ /* Bits and mask definition for SC27XX_ADC_CH_CFG register */ #define SC27XX_ADC_CHN_ID_MASK GENMASK(4, 0) -#define SC27XX_ADC_SCALE_MASK GENMASK(10, 8) -#define SC27XX_ADC_SCALE_SHIFT 8 +#define SC27XX_ADC_SCALE_MASK GENMASK(10, 9) +#define SC27XX_ADC_SCALE_SHIFT 9 /* Bits definitions for SC27XX_ADC_INT_EN registers */ #define SC27XX_ADC_IRQ_EN BIT(0) @@ -103,14 +103,14 @@ static struct sc27xx_adc_linear_graph small_scale_graph = { 100, 341, }; -static const struct sc27xx_adc_linear_graph big_scale_graph_calib = { - 4200, 856, - 3600, 733, +static const struct sc27xx_adc_linear_graph sc2731_big_scale_graph_calib = { + 4200, 850, + 3600, 728, }; -static const struct sc27xx_adc_linear_graph small_scale_graph_calib = { - 1000, 833, - 100, 80, +static const struct sc27xx_adc_linear_graph sc2731_small_scale_graph_calib = { + 1000, 838, + 100, 84, }; static int sc27xx_adc_get_calib_data(u32 calib_data, int calib_adc) @@ -130,11 +130,11 @@ static int sc27xx_adc_scale_calibration(struct sc27xx_adc_data *data, size_t len; if (big_scale) { - calib_graph = &big_scale_graph_calib; + calib_graph = &sc2731_big_scale_graph_calib; graph = &big_scale_graph; cell_name = "big_scale_calib"; } else { - calib_graph = &small_scale_graph_calib; + calib_graph = &sc2731_small_scale_graph_calib; graph = &small_scale_graph; cell_name = "small_scale_calib"; } diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index a83199b212a4..20fc867e3998 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -64,6 +64,7 @@ struct stm32_adc_priv; * @max_clk_rate_hz: maximum analog clock rate (Hz, from datasheet) * @has_syscfg: SYSCFG capability flags * @num_irqs: number of interrupt lines + * @num_adcs: maximum number of ADC instances in the common registers */ struct stm32_adc_priv_cfg { const struct stm32_adc_common_regs *regs; @@ -71,6 +72,7 @@ struct stm32_adc_priv_cfg { u32 max_clk_rate_hz; unsigned int has_syscfg; unsigned int num_irqs; + unsigned int num_adcs; }; /** @@ -333,7 +335,7 @@ static void stm32_adc_irq_handler(struct irq_desc *desc) * before invoking the interrupt handler (e.g. call ISR only for * IRQ-enabled ADCs). */ - for (i = 0; i < priv->cfg->num_irqs; i++) { + for (i = 0; i < priv->cfg->num_adcs; i++) { if ((status & priv->cfg->regs->eoc_msk[i] && stm32_adc_eoc_enabled(priv, i)) || (status & priv->cfg->regs->ovr_msk[i])) @@ -784,6 +786,7 @@ static const struct stm32_adc_priv_cfg stm32f4_adc_priv_cfg = { .clk_sel = stm32f4_adc_clk_sel, .max_clk_rate_hz = 36000000, .num_irqs = 1, + .num_adcs = 3, }; static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = { @@ -792,14 +795,16 @@ static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = { .max_clk_rate_hz = 36000000, .has_syscfg = HAS_VBOOSTER, .num_irqs = 1, + .num_adcs = 2, }; static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = { .regs = &stm32h7_adc_common_regs, .clk_sel = stm32h7_adc_clk_sel, - .max_clk_rate_hz = 40000000, + .max_clk_rate_hz = 36000000, .has_syscfg = HAS_VBOOSTER | HAS_ANASWVDD, .num_irqs = 2, + .num_adcs = 2, }; static const struct of_device_id stm32_adc_of_match[] = { diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 9939dee01743..e60ad48196ff 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1265,7 +1265,6 @@ static irqreturn_t stm32_adc_threaded_isr(int irq, void *data) struct stm32_adc *adc = iio_priv(indio_dev); const struct stm32_adc_regspec *regs = adc->cfg->regs; u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg); - u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg); /* Check ovr status right now, as ovr mask should be already disabled */ if (status & regs->isr_ovr.mask) { @@ -1280,11 +1279,6 @@ static irqreturn_t stm32_adc_threaded_isr(int irq, void *data) return IRQ_HANDLED; } - if (!(status & mask)) - dev_err_ratelimited(&indio_dev->dev, - "Unexpected IRQ: IER=0x%08x, ISR=0x%08x\n", - mask, status); - return IRQ_NONE; } @@ -1294,10 +1288,6 @@ static irqreturn_t stm32_adc_isr(int irq, void *data) struct stm32_adc *adc = iio_priv(indio_dev); const struct stm32_adc_regspec *regs = adc->cfg->regs; u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg); - u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg); - - if (!(status & mask)) - return IRQ_WAKE_THREAD; if (status & regs->isr_ovr.mask) { /* diff --git a/drivers/iio/adc/stmpe-adc.c b/drivers/iio/adc/stmpe-adc.c index fba659bfdb40..64305d9fa560 100644 --- a/drivers/iio/adc/stmpe-adc.c +++ b/drivers/iio/adc/stmpe-adc.c @@ -61,7 +61,7 @@ struct stmpe_adc { static int stmpe_read_voltage(struct stmpe_adc *info, struct iio_chan_spec const *chan, int *val) { - long ret; + unsigned long ret; mutex_lock(&info->lock); @@ -79,7 +79,7 @@ static int stmpe_read_voltage(struct stmpe_adc *info, ret = wait_for_completion_timeout(&info->completion, STMPE_ADC_TIMEOUT); - if (ret <= 0) { + if (ret == 0) { stmpe_reg_write(info->stmpe, STMPE_REG_ADC_INT_STA, STMPE_ADC_CH(info->channel)); mutex_unlock(&info->lock); @@ -96,7 +96,7 @@ static int stmpe_read_voltage(struct stmpe_adc *info, static int stmpe_read_temp(struct stmpe_adc *info, struct iio_chan_spec const *chan, int *val) { - long ret; + unsigned long ret; mutex_lock(&info->lock); @@ -114,7 +114,7 @@ static int stmpe_read_temp(struct stmpe_adc *info, ret = wait_for_completion_timeout(&info->completion, STMPE_ADC_TIMEOUT); - if (ret <= 0) { + if (ret == 0) { mutex_unlock(&info->lock); return -ETIMEDOUT; } diff --git a/drivers/iio/chemical/ccs811.c b/drivers/iio/chemical/ccs811.c index 60dd87e96f5f..384d167b4fd6 100644 --- a/drivers/iio/chemical/ccs811.c +++ b/drivers/iio/chemical/ccs811.c @@ -500,11 +500,11 @@ static int ccs811_probe(struct i2c_client *client, data->drdy_trig->dev.parent = &client->dev; data->drdy_trig->ops = &ccs811_trigger_ops; iio_trigger_set_drvdata(data->drdy_trig, indio_dev); - indio_dev->trig = data->drdy_trig; - iio_trigger_get(indio_dev->trig); ret = iio_trigger_register(data->drdy_trig); if (ret) goto err_poweroff; + + indio_dev->trig = iio_trigger_get(data->drdy_trig); } ret = iio_triggered_buffer_setup(indio_dev, NULL, diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 7a69c1be7393..56206fdbceb9 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -70,16 +70,18 @@ static int st_sensors_match_odr(struct st_sensor_settings *sensor_settings, int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr) { - int err; + int err = 0; struct st_sensor_odr_avl odr_out = {0, 0}; struct st_sensor_data *sdata = iio_priv(indio_dev); + mutex_lock(&sdata->odr_lock); + if (!sdata->sensor_settings->odr.mask) - return 0; + goto unlock_mutex; err = st_sensors_match_odr(sdata->sensor_settings, odr, &odr_out); if (err < 0) - goto st_sensors_match_odr_error; + goto unlock_mutex; if ((sdata->sensor_settings->odr.addr == sdata->sensor_settings->pw.addr) && @@ -102,7 +104,9 @@ int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr) if (err >= 0) sdata->odr = odr_out.hz; -st_sensors_match_odr_error: +unlock_mutex: + mutex_unlock(&sdata->odr_lock); + return err; } EXPORT_SYMBOL(st_sensors_set_odr); @@ -364,6 +368,8 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, struct st_sensors_platform_data *of_pdata; int err = 0; + mutex_init(&sdata->odr_lock); + /* If OF/DT pdata exists, it will take precedence of anything else */ of_pdata = st_sensors_dev_probe(indio_dev->dev.parent, pdata); if (IS_ERR(of_pdata)) @@ -557,18 +563,24 @@ int st_sensors_read_info_raw(struct iio_dev *indio_dev, err = -EBUSY; goto out; } else { + mutex_lock(&sdata->odr_lock); err = st_sensors_set_enable(indio_dev, true); - if (err < 0) + if (err < 0) { + mutex_unlock(&sdata->odr_lock); goto out; + } msleep((sdata->sensor_settings->bootime * 1000) / sdata->odr); err = st_sensors_read_axis_data(indio_dev, ch, val); - if (err < 0) + if (err < 0) { + mutex_unlock(&sdata->odr_lock); goto out; + } *val = *val >> ch->scan_type.shift; err = st_sensors_set_enable(indio_dev, false); + mutex_unlock(&sdata->odr_lock); } out: mutex_unlock(&indio_dev->mlock); diff --git a/drivers/iio/dummy/iio_simple_dummy.c b/drivers/iio/dummy/iio_simple_dummy.c index c0b7ef900735..c24f609c2ade 100644 --- a/drivers/iio/dummy/iio_simple_dummy.c +++ b/drivers/iio/dummy/iio_simple_dummy.c @@ -575,10 +575,9 @@ static struct iio_sw_device *iio_dummy_probe(const char *name) */ swd = kzalloc(sizeof(*swd), GFP_KERNEL); - if (!swd) { - ret = -ENOMEM; - goto error_kzalloc; - } + if (!swd) + return ERR_PTR(-ENOMEM); + /* * Allocate an IIO device. * @@ -590,7 +589,7 @@ static struct iio_sw_device *iio_dummy_probe(const char *name) indio_dev = iio_device_alloc(parent, sizeof(*st)); if (!indio_dev) { ret = -ENOMEM; - goto error_ret; + goto error_free_swd; } st = iio_priv(indio_dev); @@ -616,6 +615,10 @@ static struct iio_sw_device *iio_dummy_probe(const char *name) * indio_dev->name = spi_get_device_id(spi)->name; */ indio_dev->name = kstrdup(name, GFP_KERNEL); + if (!indio_dev->name) { + ret = -ENOMEM; + goto error_free_device; + } /* Provide description of available channels */ indio_dev->channels = iio_dummy_channels; @@ -632,7 +635,7 @@ static struct iio_sw_device *iio_dummy_probe(const char *name) ret = iio_simple_dummy_events_register(indio_dev); if (ret < 0) - goto error_free_device; + goto error_free_name; ret = iio_simple_dummy_configure_buffer(indio_dev); if (ret < 0) @@ -649,11 +652,12 @@ static struct iio_sw_device *iio_dummy_probe(const char *name) iio_simple_dummy_unconfigure_buffer(indio_dev); error_unregister_events: iio_simple_dummy_events_unregister(indio_dev); +error_free_name: + kfree(indio_dev->name); error_free_device: iio_device_free(indio_dev); -error_ret: +error_free_swd: kfree(swd); -error_kzalloc: return ERR_PTR(ret); } diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index 39e1c4306c47..84c6ad4bcccb 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -872,6 +872,7 @@ static int mpu3050_power_up(struct mpu3050 *mpu3050) ret = regmap_update_bits(mpu3050->map, MPU3050_PWR_MGM, MPU3050_PWR_MGM_SLEEP, 0); if (ret) { + regulator_bulk_disable(ARRAY_SIZE(mpu3050->regs), mpu3050->regs); dev_err(mpu3050->dev, "error setting power mode\n"); return ret; } diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600.h b/drivers/iio/imu/inv_icm42600/inv_icm42600.h index c0f5059b13b3..995a9dc06521 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600.h +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600.h @@ -17,6 +17,7 @@ #include "inv_icm42600_buffer.h" enum inv_icm42600_chip { + INV_CHIP_INVALID, INV_CHIP_ICM42600, INV_CHIP_ICM42602, INV_CHIP_ICM42605, diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c index 8bd77185ccb7..dcbd4e928851 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c @@ -565,7 +565,7 @@ int inv_icm42600_core_probe(struct regmap *regmap, int chip, int irq, bool open_drain; int ret; - if (chip < 0 || chip >= INV_CHIP_NB) { + if (chip <= INV_CHIP_INVALID || chip >= INV_CHIP_NB) { dev_err(dev, "invalid chip = %d\n", chip); return -ENODEV; } diff --git a/drivers/iio/proximity/vl53l0x-i2c.c b/drivers/iio/proximity/vl53l0x-i2c.c index 235e125aeb3a..3d3ab86423ee 100644 --- a/drivers/iio/proximity/vl53l0x-i2c.c +++ b/drivers/iio/proximity/vl53l0x-i2c.c @@ -104,6 +104,7 @@ static int vl53l0x_read_proximity(struct vl53l0x_data *data, u16 tries = 20; u8 buffer[12]; int ret; + unsigned long time_left; ret = i2c_smbus_write_byte_data(client, VL_REG_SYSRANGE_START, 1); if (ret < 0) @@ -112,10 +113,8 @@ static int vl53l0x_read_proximity(struct vl53l0x_data *data, if (data->client->irq) { reinit_completion(&data->completion); - ret = wait_for_completion_timeout(&data->completion, HZ/10); - if (ret < 0) - return ret; - else if (ret == 0) + time_left = wait_for_completion_timeout(&data->completion, HZ/10); + if (time_left == 0) return -ETIMEDOUT; vl53l0x_clear_irq(data); diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c index e09e58072872..2277d6336ac0 100644 --- a/drivers/iio/trigger/iio-trig-sysfs.c +++ b/drivers/iio/trigger/iio-trig-sysfs.c @@ -196,6 +196,7 @@ static int iio_sysfs_trigger_remove(int id) } iio_trigger_unregister(t->trig); + irq_work_sync(&t->work); iio_trigger_free(t->trig); list_del(&t->l); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index ee568bdf3c78..3cc7a23fa69f 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1280,8 +1280,10 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, return ERR_CAST(cm_id_priv); err = cm_init_listen(cm_id_priv, service_id, 0); - if (err) + if (err) { + ib_destroy_cm_id(&cm_id_priv->id); return ERR_PTR(err); + } spin_lock_irq(&cm_id_priv->lock); listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 329ee4f48d95..cfc2110fc38a 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -306,6 +306,8 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) unsigned long dim = from->nr_segs; int idx; + if (!HFI1_CAP_IS_KSET(SDMA)) + return -EINVAL; idx = srcu_read_lock(&fd->pq_srcu); pq = srcu_dereference(fd->pq, &fd->pq_srcu); if (!cq || !pq) { diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index fa2cd76747ff..837293aac68f 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -530,7 +530,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd) u16 shift, mult; u64 src; u32 current_egress_rate; /* Mbits /sec */ - u32 max_pkt_time; + u64 max_pkt_time; /* * max_pkt_time is the maximum packet egress time in units * of the fabric clock period 1/(805 MHz). diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 0b73dc7847aa..a044bee257f9 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1330,11 +1330,13 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) kvfree(sde->tx_ring); sde->tx_ring = NULL; } - spin_lock_irq(&dd->sde_map_lock); - sdma_map_free(rcu_access_pointer(dd->sdma_map)); - RCU_INIT_POINTER(dd->sdma_map, NULL); - spin_unlock_irq(&dd->sde_map_lock); - synchronize_rcu(); + if (rcu_access_pointer(dd->sdma_map)) { + spin_lock_irq(&dd->sde_map_lock); + sdma_map_free(rcu_access_pointer(dd->sdma_map)); + RCU_INIT_POINTER(dd->sdma_map, NULL); + spin_unlock_irq(&dd->sde_map_lock); + synchronize_rcu(); + } kfree(dd->per_sdma); dd->per_sdma = NULL; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 9dde70373a55..8ef6eecc42a0 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -418,6 +418,7 @@ struct qedr_qp { u32 sq_psn; u32 qkey; u32 dest_qp_num; + u8 timeout; /* Relevant to qps created from kernel space only (ULPs) */ u8 prev_wqe_size; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index eeb87f31cd25..f7b97b8e81a4 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2622,6 +2622,8 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1 << max_t(int, attr->timeout - 8, 0); else qp_params.ack_timeout = 0; + + qp->timeout = attr->timeout; } if (attr_mask & IB_QP_RETRY_CNT) { @@ -2781,7 +2783,7 @@ int qedr_query_qp(struct ib_qp *ibqp, rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]); rdma_ah_set_port_num(&qp_attr->ah_attr, 1); rdma_ah_set_sl(&qp_attr->ah_attr, 0); - qp_attr->timeout = params.timeout; + qp_attr->timeout = qp->timeout; qp_attr->rnr_retry = params.rnr_retry; qp_attr->retry_cnt = params.retry_cnt; qp_attr->min_rnr_timer = params.min_rnr_nak_timer; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d8d52a00a1be..585a9c76e518 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2826,7 +2826,7 @@ void rvt_qp_iter(struct rvt_dev_info *rdi, EXPORT_SYMBOL(rvt_qp_iter); /* - * This should be called with s_lock held. + * This should be called with s_lock and r_lock held. */ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) @@ -3185,7 +3185,9 @@ void rvt_ruc_loopback(struct rvt_qp *sqp) rvp->n_loop_pkts++; flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (local_ops) { atomic_dec(&sqp->local_ops_pending); local_ops = 0; @@ -3239,7 +3241,9 @@ void rvt_ruc_loopback(struct rvt_qp *sqp) spin_unlock_irqrestore(&qp->r_lock, flags); serr_no_r_lock: spin_lock_irqsave(&sqp->s_lock, flags); + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (sqp->ibqp.qp_type == IB_QPT_RC) { int lastwqe; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index d4917646641a..69238f856c67 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -650,7 +650,7 @@ int rxe_requester(void *arg) opcode = next_opcode(qp, wqe, wqe->wr.opcode); if (unlikely(opcode < 0)) { wqe->status = IB_WC_LOC_QP_OP_ERR; - goto exit; + goto err; } mask = rxe_opcode[opcode].mask; diff --git a/drivers/input/input.c b/drivers/input/input.c index 3cfd2c18eebd..49504dcd5dc6 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -47,6 +47,17 @@ static DEFINE_MUTEX(input_mutex); static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 }; +static const unsigned int input_max_code[EV_CNT] = { + [EV_KEY] = KEY_MAX, + [EV_REL] = REL_MAX, + [EV_ABS] = ABS_MAX, + [EV_MSC] = MSC_MAX, + [EV_SW] = SW_MAX, + [EV_LED] = LED_MAX, + [EV_SND] = SND_MAX, + [EV_FF] = FF_MAX, +}; + static inline int is_event_supported(unsigned int code, unsigned long *bm, unsigned int max) { @@ -1976,6 +1987,14 @@ EXPORT_SYMBOL(input_get_timestamp); */ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code) { + if (type < EV_CNT && input_max_code[type] && + code > input_max_code[type]) { + pr_err("%s: invalid code %u for type %u\n", __func__, code, + type); + dump_stack(); + return; + } + switch (type) { case EV_KEY: __set_bit(code, dev->keybit); diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index cb6ec59a045d..efffcf0ebd3b 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -85,13 +85,13 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = { }, { /* - * Lenovo Yoga Tab2 1051L, something messes with the home-button + * Lenovo Yoga Tab2 1051F/1051L, something messes with the home-button * IRQ settings, leading to a non working home-button. */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "60073"), - DMI_MATCH(DMI_PRODUCT_VERSION, "1051L"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1051"), }, }, {} /* Terminating entry */ diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c index fe43e5557ed7..cdcb7737c46a 100644 --- a/drivers/input/misc/sparcspkr.c +++ b/drivers/input/misc/sparcspkr.c @@ -205,6 +205,7 @@ static int bbc_beep_probe(struct platform_device *op) info = &state->u.bbc; info->clock_freq = of_getintprop_default(dp, "clock-frequency", 0); + of_node_put(dp); if (!info->clock_freq) goto out_free; diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index 59a14505b9cd..ca150618d32f 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -942,17 +942,22 @@ static int bcm5974_probe(struct usb_interface *iface, if (!dev->tp_data) goto err_free_bt_buffer; - if (dev->bt_urb) + if (dev->bt_urb) { usb_fill_int_urb(dev->bt_urb, udev, usb_rcvintpipe(udev, cfg->bt_ep), dev->bt_data, dev->cfg.bt_datalen, bcm5974_irq_button, dev, 1); + dev->bt_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + } + usb_fill_int_urb(dev->tp_urb, udev, usb_rcvintpipe(udev, cfg->tp_ep), dev->tp_data, dev->cfg.tp_datalen, bcm5974_irq_trackpad, dev, 1); + dev->tp_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + /* create bcm5974 device */ usb_make_path(udev, dev->phys, sizeof(dev->phys)); strlcat(dev->phys, "/input0", sizeof(dev->phys)); diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 30576a5f2f04..f437eefec94a 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -420,9 +420,9 @@ static int ili210x_i2c_probe(struct i2c_client *client, if (error) return error; - usleep_range(50, 100); + usleep_range(12000, 15000); gpiod_set_value_cansleep(reset_gpio, 0); - msleep(100); + msleep(160); } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c index 9a64e1dbc04a..a05a7a66b4ed 100644 --- a/drivers/input/touchscreen/stmfts.c +++ b/drivers/input/touchscreen/stmfts.c @@ -337,13 +337,15 @@ static int stmfts_input_open(struct input_dev *dev) struct stmfts_data *sdata = input_get_drvdata(dev); int err; - err = pm_runtime_get_sync(&sdata->client->dev); - if (err < 0) + err = pm_runtime_resume_and_get(&sdata->client->dev); + if (err) return err; err = i2c_smbus_write_byte(sdata->client, STMFTS_MS_MT_SENSE_ON); - if (err) + if (err) { + pm_runtime_put_sync(&sdata->client->dev); return err; + } mutex_lock(&sdata->mutex); sdata->running = true; diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 7887941730db..ceb6cdc20484 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1084,9 +1084,14 @@ static int of_count_icc_providers(struct device_node *np) { struct device_node *child; int count = 0; + const struct of_device_id __maybe_unused ignore_list[] = { + { .compatible = "qcom,sc7180-ipa-virt" }, + {} + }; for_each_available_child_of_node(np, child) { - if (of_property_read_bool(child, "#interconnect-cells")) + if (of_property_read_bool(child, "#interconnect-cells") && + likely(!of_match_node(ignore_list, child))) count++; count += of_count_icc_providers(child); } diff --git a/drivers/interconnect/qcom/sc7180.c b/drivers/interconnect/qcom/sc7180.c index 8d9044ed18ab..597a7ee7a9bb 100644 --- a/drivers/interconnect/qcom/sc7180.c +++ b/drivers/interconnect/qcom/sc7180.c @@ -47,7 +47,6 @@ DEFINE_QNODE(qnm_mnoc_sf, SC7180_MASTER_MNOC_SF_MEM_NOC, 1, 32, SC7180_SLAVE_GEM DEFINE_QNODE(qnm_snoc_gc, SC7180_MASTER_SNOC_GC_MEM_NOC, 1, 8, SC7180_SLAVE_LLCC); DEFINE_QNODE(qnm_snoc_sf, SC7180_MASTER_SNOC_SF_MEM_NOC, 1, 16, SC7180_SLAVE_LLCC); DEFINE_QNODE(qxm_gpu, SC7180_MASTER_GFX3D, 2, 32, SC7180_SLAVE_GEM_NOC_SNOC, SC7180_SLAVE_LLCC); -DEFINE_QNODE(ipa_core_master, SC7180_MASTER_IPA_CORE, 1, 8, SC7180_SLAVE_IPA_CORE); DEFINE_QNODE(llcc_mc, SC7180_MASTER_LLCC, 2, 4, SC7180_SLAVE_EBI1); DEFINE_QNODE(qhm_mnoc_cfg, SC7180_MASTER_CNOC_MNOC_CFG, 1, 4, SC7180_SLAVE_SERVICE_MNOC); DEFINE_QNODE(qxm_camnoc_hf0, SC7180_MASTER_CAMNOC_HF0, 2, 32, SC7180_SLAVE_MNOC_HF_MEM_NOC); @@ -129,7 +128,6 @@ DEFINE_QNODE(qhs_mdsp_ms_mpu_cfg, SC7180_SLAVE_MSS_PROC_MS_MPU_CFG, 1, 4); DEFINE_QNODE(qns_gem_noc_snoc, SC7180_SLAVE_GEM_NOC_SNOC, 1, 8, SC7180_MASTER_GEM_NOC_SNOC); DEFINE_QNODE(qns_llcc, SC7180_SLAVE_LLCC, 1, 16, SC7180_MASTER_LLCC); DEFINE_QNODE(srvc_gemnoc, SC7180_SLAVE_SERVICE_GEM_NOC, 1, 4); -DEFINE_QNODE(ipa_core_slave, SC7180_SLAVE_IPA_CORE, 1, 8); DEFINE_QNODE(ebi, SC7180_SLAVE_EBI1, 2, 4); DEFINE_QNODE(qns_mem_noc_hf, SC7180_SLAVE_MNOC_HF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_HF_MEM_NOC); DEFINE_QNODE(qns_mem_noc_sf, SC7180_SLAVE_MNOC_SF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_SF_MEM_NOC); @@ -160,7 +158,6 @@ DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi); DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc); DEFINE_QBCM(bcm_mm0, "MM0", false, &qns_mem_noc_hf); DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto); -DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave); DEFINE_QBCM(bcm_cn0, "CN0", true, &qnm_snoc, &xm_qdss_dap, &qhs_a1_noc_cfg, &qhs_a2_noc_cfg, &qhs_ahb2phy0, &qhs_aop, &qhs_aoss, &qhs_boot_rom, &qhs_camera_cfg, &qhs_camera_nrt_throttle_cfg, &qhs_camera_rt_throttle_cfg, &qhs_clk_ctl, &qhs_cpr_cx, &qhs_cpr_mx, &qhs_crypto0_cfg, &qhs_dcc_cfg, &qhs_ddrss_cfg, &qhs_display_cfg, &qhs_display_rt_throttle_cfg, &qhs_display_throttle_cfg, &qhs_glm, &qhs_gpuss_cfg, &qhs_imem_cfg, &qhs_ipa, &qhs_mnoc_cfg, &qhs_mss_cfg, &qhs_npu_cfg, &qhs_npu_dma_throttle_cfg, &qhs_npu_dsp_throttle_cfg, &qhs_pimem_cfg, &qhs_prng, &qhs_qdss_cfg, &qhs_qm_cfg, &qhs_qm_mpu_cfg, &qhs_qup0, &qhs_qup1, &qhs_security, &qhs_snoc_cfg, &qhs_tcsr, &qhs_tlmm_1, &qhs_tlmm_2, &qhs_tlmm_3, &qhs_ufs_mem_cfg, &qhs_usb3, &qhs_venus_cfg, &qhs_venus_throttle_cfg, &qhs_vsense_ctrl_cfg, &srvc_cnoc); DEFINE_QBCM(bcm_mm1, "MM1", false, &qxm_camnoc_hf0_uncomp, &qxm_camnoc_hf1_uncomp, &qxm_camnoc_sf_uncomp, &qhm_mnoc_cfg, &qxm_mdp0, &qxm_rot, &qxm_venus0, &qxm_venus_arm9); DEFINE_QBCM(bcm_sh2, "SH2", false, &acm_sys_tcu); @@ -372,22 +369,6 @@ static struct qcom_icc_desc sc7180_gem_noc = { .num_bcms = ARRAY_SIZE(gem_noc_bcms), }; -static struct qcom_icc_bcm *ipa_virt_bcms[] = { - &bcm_ip0, -}; - -static struct qcom_icc_node *ipa_virt_nodes[] = { - [MASTER_IPA_CORE] = &ipa_core_master, - [SLAVE_IPA_CORE] = &ipa_core_slave, -}; - -static struct qcom_icc_desc sc7180_ipa_virt = { - .nodes = ipa_virt_nodes, - .num_nodes = ARRAY_SIZE(ipa_virt_nodes), - .bcms = ipa_virt_bcms, - .num_bcms = ARRAY_SIZE(ipa_virt_bcms), -}; - static struct qcom_icc_bcm *mc_virt_bcms[] = { &bcm_acv, &bcm_mc0, @@ -611,8 +592,6 @@ static const struct of_device_id qnoc_of_match[] = { .data = &sc7180_dc_noc}, { .compatible = "qcom,sc7180-gem-noc", .data = &sc7180_gem_noc}, - { .compatible = "qcom,sc7180-ipa-virt", - .data = &sc7180_ipa_virt}, { .compatible = "qcom,sc7180-mc-virt", .data = &sc7180_mc_virt}, { .compatible = "qcom,sc7180-mmss-noc", diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 6eaefc9e7b3d..e988f6f198c5 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -84,7 +84,7 @@ #define ACPI_DEVFLAG_LINT1 0x80 #define ACPI_DEVFLAG_ATSDIS 0x10000000 -#define LOOP_TIMEOUT 100000 +#define LOOP_TIMEOUT 2000000 /* * ACPI table definitions * diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 433031389296..2b24dfabc68f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3505,6 +3505,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; if (resource_size(res) < arm_smmu_resource_size(smmu)) { dev_err(dev, "MMIO region too small (%pr)\n", res); return -EINVAL; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 6c9a69305636..fa6caa2a12e5 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -2117,11 +2117,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (err) return err; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ioaddr = res->start; - smmu->base = devm_ioremap_resource(dev, res); + smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(smmu->base)) return PTR_ERR(smmu->base); + ioaddr = res->start; /* * The resource size should effectively match the value of SMMU_TOP; * stash that temporarily until we know PAGESIZE to validate it with. diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b8d0b56a7575..70d569b80ecf 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -385,7 +385,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, static struct notifier_block dmar_pci_bus_nb = { .notifier_call = dmar_pci_bus_notifier, - .priority = INT_MIN, + .priority = 1, }; static struct dmar_drhd_unit * diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 21749859ad45..477dde39823c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -6296,7 +6296,7 @@ static void quirk_igfx_skip_te_disable(struct pci_dev *dev) ver = (dev->device >> 8) & 0xff; if (ver != 0x45 && ver != 0x46 && ver != 0x4c && ver != 0x4e && ver != 0x8a && ver != 0x98 && - ver != 0x9a) + ver != 0x9a && ver != 0xa7) return; if (risky_device(dev)) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index f0ba6a09b434..4c2b5d76e73f 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -616,16 +616,19 @@ static void insert_iommu_master(struct device *dev, static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *spec) { - struct msm_iommu_dev *iommu; + struct msm_iommu_dev *iommu = NULL, *iter; unsigned long flags; int ret = 0; spin_lock_irqsave(&msm_iommu_lock, flags); - list_for_each_entry(iommu, &qcom_iommu_devices, dev_node) - if (iommu->dev->of_node == spec->np) + list_for_each_entry(iter, &qcom_iommu_devices, dev_node) { + if (iter->dev->of_node == spec->np) { + iommu = iter; break; + } + } - if (!iommu || iommu->dev->of_node != spec->np) { + if (!iommu) { ret = -ENODEV; goto fail; } diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e168a682806a..e864288a6e0e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -932,8 +932,7 @@ static int mtk_iommu_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&data->iommu); iommu_device_unregister(&data->iommu); - if (iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, NULL); + list_del(&data->list); clk_disable_unprepare(data->bclk); device_link_remove(data->smicomm_dev, &pdev->dev); diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 84f2741aaac6..c76fb70c70bb 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -308,7 +308,16 @@ static inline int armada_370_xp_msi_init(struct device_node *node, static void armada_xp_mpic_perf_init(void) { - unsigned long cpuid = cpu_logical_map(smp_processor_id()); + unsigned long cpuid; + + /* + * This Performance Counter Overflow interrupt is specific for + * Armada 370 and XP. It is not available on Armada 375, 38x and 39x. + */ + if (!of_machine_is_compatible("marvell,armada-370-xp")) + return; + + cpuid = cpu_logical_map(smp_processor_id()); /* Enable Performance Counter Overflow interrupts */ writel(ARMADA_370_XP_INT_CAUSE_PERF(cpuid), diff --git a/drivers/irqchip/irq-aspeed-i2c-ic.c b/drivers/irqchip/irq-aspeed-i2c-ic.c index 8d591c179f81..3d3210828e9b 100644 --- a/drivers/irqchip/irq-aspeed-i2c-ic.c +++ b/drivers/irqchip/irq-aspeed-i2c-ic.c @@ -79,8 +79,8 @@ static int __init aspeed_i2c_ic_of_init(struct device_node *node, } i2c_ic->parent_irq = irq_of_parse_and_map(node, 0); - if (i2c_ic->parent_irq < 0) { - ret = i2c_ic->parent_irq; + if (!i2c_ic->parent_irq) { + ret = -EINVAL; goto err_iounmap; } diff --git a/drivers/irqchip/irq-aspeed-scu-ic.c b/drivers/irqchip/irq-aspeed-scu-ic.c index 0f0aac7cc114..7cb13364ecfa 100644 --- a/drivers/irqchip/irq-aspeed-scu-ic.c +++ b/drivers/irqchip/irq-aspeed-scu-ic.c @@ -159,8 +159,8 @@ static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic, } irq = irq_of_parse_and_map(node, 0); - if (irq < 0) { - rc = irq; + if (!irq) { + rc = -EINVAL; goto err; } diff --git a/drivers/irqchip/irq-gic-realview.c b/drivers/irqchip/irq-gic-realview.c index b4c1924f0255..38fab02ffe9d 100644 --- a/drivers/irqchip/irq-gic-realview.c +++ b/drivers/irqchip/irq-gic-realview.c @@ -57,6 +57,7 @@ realview_gic_of_init(struct device_node *node, struct device_node *parent) /* The PB11MPCore GIC needs to be configured in the syscon */ map = syscon_node_to_regmap(np); + of_node_put(np); if (!IS_ERR(map)) { /* new irq mode with no DCC */ regmap_write(map, REALVIEW_SYS_LOCK_OFFSET, diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 5bc813cfc36c..6db1dbc6a736 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1848,7 +1848,7 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) gic_data.ppi_descs = kcalloc(gic_data.ppi_nr, sizeof(*gic_data.ppi_descs), GFP_KERNEL); if (!gic_data.ppi_descs) - return; + goto out_put_node; nr_parts = of_get_child_count(parts_node); @@ -1889,12 +1889,15 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node) continue; cpu = of_cpu_node_to_id(cpu_node); - if (WARN_ON(cpu < 0)) + if (WARN_ON(cpu < 0)) { + of_node_put(cpu_node); continue; + } pr_cont("%pOF[%d] ", cpu_node, cpu); cpumask_set_cpu(cpu, &part->mask); + of_node_put(cpu_node); } pr_cont("}\n"); diff --git a/drivers/irqchip/irq-or1k-pic.c b/drivers/irqchip/irq-or1k-pic.c index 03d2366118dd..d5f1fabc45d7 100644 --- a/drivers/irqchip/irq-or1k-pic.c +++ b/drivers/irqchip/irq-or1k-pic.c @@ -66,7 +66,6 @@ static struct or1k_pic_dev or1k_pic_level = { .name = "or1k-PIC-level", .irq_unmask = or1k_pic_unmask, .irq_mask = or1k_pic_mask, - .irq_mask_ack = or1k_pic_mask_ack, }, .handle = handle_level_irq, .flags = IRQ_LEVEL | IRQ_NOPROBE, diff --git a/drivers/irqchip/irq-sni-exiu.c b/drivers/irqchip/irq-sni-exiu.c index abd011fcecf4..c7db617e1a2f 100644 --- a/drivers/irqchip/irq-sni-exiu.c +++ b/drivers/irqchip/irq-sni-exiu.c @@ -37,11 +37,26 @@ struct exiu_irq_data { u32 spi_base; }; -static void exiu_irq_eoi(struct irq_data *d) +static void exiu_irq_ack(struct irq_data *d) { struct exiu_irq_data *data = irq_data_get_irq_chip_data(d); writel(BIT(d->hwirq), data->base + EIREQCLR); +} + +static void exiu_irq_eoi(struct irq_data *d) +{ + struct exiu_irq_data *data = irq_data_get_irq_chip_data(d); + + /* + * Level triggered interrupts are latched and must be cleared during + * EOI or the interrupt will be jammed on. Of course if a level + * triggered interrupt is still asserted then the write will not clear + * the interrupt. + */ + if (irqd_is_level_type(d)) + writel(BIT(d->hwirq), data->base + EIREQCLR); + irq_chip_eoi_parent(d); } @@ -91,10 +106,13 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type) writel_relaxed(val, data->base + EILVL); val = readl_relaxed(data->base + EIEDG); - if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH) + if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH) { val &= ~BIT(d->hwirq); - else + irq_set_handler_locked(d, handle_fasteoi_irq); + } else { val |= BIT(d->hwirq); + irq_set_handler_locked(d, handle_fasteoi_ack_irq); + } writel_relaxed(val, data->base + EIEDG); writel_relaxed(BIT(d->hwirq), data->base + EIREQCLR); @@ -104,6 +122,7 @@ static int exiu_irq_set_type(struct irq_data *d, unsigned int type) static struct irq_chip exiu_irq_chip = { .name = "EXIU", + .irq_ack = exiu_irq_ack, .irq_eoi = exiu_irq_eoi, .irq_enable = exiu_irq_enable, .irq_mask = exiu_irq_mask, diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c index 27933338f7b3..8c581c985aa7 100644 --- a/drivers/irqchip/irq-xtensa-mx.c +++ b/drivers/irqchip/irq-xtensa-mx.c @@ -151,14 +151,25 @@ static struct irq_chip xtensa_mx_irq_chip = { .irq_set_affinity = xtensa_mx_irq_set_affinity, }; +static void __init xtensa_mx_init_common(struct irq_domain *root_domain) +{ + unsigned int i; + + irq_set_default_host(root_domain); + secondary_init_irq(); + + /* Initialize default IRQ routing to CPU 0 */ + for (i = 0; i < XCHAL_NUM_EXTINTERRUPTS; ++i) + set_er(1, MIROUT(i)); +} + int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent) { struct irq_domain *root_domain = irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0, &xtensa_mx_irq_domain_ops, &xtensa_mx_irq_chip); - irq_set_default_host(root_domain); - secondary_init_irq(); + xtensa_mx_init_common(root_domain); return 0; } @@ -168,8 +179,7 @@ static int __init xtensa_mx_init(struct device_node *np, struct irq_domain *root_domain = irq_domain_add_linear(np, NR_IRQS, &xtensa_mx_irq_domain_ops, &xtensa_mx_irq_chip); - irq_set_default_host(root_domain); - secondary_init_irq(); + xtensa_mx_init_common(root_domain); return 0; } IRQCHIP_DECLARE(xtensa_mx_irq_chip, "cdns,xtensa-mx", xtensa_mx_init); diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 5cdc361da37c..539a2ed4e13d 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -44,6 +44,7 @@ config ADB_IOP config ADB_CUDA bool "Support for Cuda/Egret based Macs and PowerMacs" depends on (ADB || PPC_PMAC) && !PPC_PMAC64 + select RTC_LIB help This provides support for Cuda/Egret based Macintosh and Power Macintosh systems. This includes most m68k based Macs, @@ -57,6 +58,7 @@ config ADB_CUDA config ADB_PMU bool "Support for PMU based PowerMacs and PowerBooks" depends on PPC_PMAC || MAC + select RTC_LIB help On PowerBooks, iBooks, and recent iMacs and Power Macintoshes, the PMU is an embedded microprocessor whose primary function is to @@ -67,6 +69,10 @@ config ADB_PMU this device; you should do so if your machine is one of those mentioned above. +config ADB_PMU_EVENT + def_bool y + depends on ADB_PMU && INPUT=y + config ADB_PMU_LED bool "Support for the Power/iBook front LED" depends on PPC_PMAC && ADB_PMU diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile index 49819b1b6f20..712edcb3e0b0 100644 --- a/drivers/macintosh/Makefile +++ b/drivers/macintosh/Makefile @@ -12,7 +12,8 @@ obj-$(CONFIG_MAC_EMUMOUSEBTN) += mac_hid.o obj-$(CONFIG_INPUT_ADBHID) += adbhid.o obj-$(CONFIG_ANSLCD) += ans-lcd.o -obj-$(CONFIG_ADB_PMU) += via-pmu.o via-pmu-event.o +obj-$(CONFIG_ADB_PMU) += via-pmu.o +obj-$(CONFIG_ADB_PMU_EVENT) += via-pmu-event.o obj-$(CONFIG_ADB_PMU_LED) += via-pmu-led.o obj-$(CONFIG_PMAC_BACKLIGHT) += via-pmu-backlight.o obj-$(CONFIG_ADB_CUDA) += via-cuda.o diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 73b396189039..afb0942ccc29 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -647,7 +647,7 @@ do_adb_query(struct adb_request *req) switch(req->data[1]) { case ADB_QUERY_GETDEVINFO: - if (req->nbytes < 3) + if (req->nbytes < 3 || req->data[2] >= 16) break; mutex_lock(&adb_handler_mutex); req->reply[0] = adb_handler[req->data[2]].original_address; diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 73e6ae88fafd..aae6328b2429 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -1460,7 +1460,7 @@ pmu_handle_data(unsigned char *data, int len) pmu_pass_intr(data, len); /* len == 6 is probably a bad check. But how do I * know what PMU versions send what events here? */ - if (len == 6) { + if (IS_ENABLED(CONFIG_ADB_PMU_EVENT) && len == 6) { via_pmu_event(PMU_EVT_POWER, !!(data[1]&8)); via_pmu_event(PMU_EVT_LID, data[1]&1); } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 418914373a51..b47c00dea0f2 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2006,8 +2006,7 @@ int bch_btree_check(struct cache_set *c) int i; struct bkey *k = NULL; struct btree_iter iter; - struct btree_check_state *check_state; - char name[32]; + struct btree_check_state check_state; /* check and mark root node keys */ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) @@ -2018,63 +2017,59 @@ int bch_btree_check(struct cache_set *c) if (c->root->level == 0) return 0; - check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL); - if (!check_state) - return -ENOMEM; - - check_state->c = c; - check_state->total_threads = bch_btree_chkthread_nr(); - check_state->key_idx = 0; - spin_lock_init(&check_state->idx_lock); - atomic_set(&check_state->started, 0); - atomic_set(&check_state->enough, 0); - init_waitqueue_head(&check_state->wait); + memset(&check_state, 0, sizeof(struct btree_check_state)); + check_state.c = c; + check_state.total_threads = bch_btree_chkthread_nr(); + check_state.key_idx = 0; + spin_lock_init(&check_state.idx_lock); + atomic_set(&check_state.started, 0); + atomic_set(&check_state.enough, 0); + init_waitqueue_head(&check_state.wait); + rw_lock(0, c->root, c->root->level); /* * Run multiple threads to check btree nodes in parallel, - * if check_state->enough is non-zero, it means current + * if check_state.enough is non-zero, it means current * running check threads are enough, unncessary to create * more. */ - for (i = 0; i < check_state->total_threads; i++) { - /* fetch latest check_state->enough earlier */ + for (i = 0; i < check_state.total_threads; i++) { + /* fetch latest check_state.enough earlier */ smp_mb__before_atomic(); - if (atomic_read(&check_state->enough)) + if (atomic_read(&check_state.enough)) break; - check_state->infos[i].result = 0; - check_state->infos[i].state = check_state; - snprintf(name, sizeof(name), "bch_btrchk[%u]", i); - atomic_inc(&check_state->started); + check_state.infos[i].result = 0; + check_state.infos[i].state = &check_state; - check_state->infos[i].thread = + check_state.infos[i].thread = kthread_run(bch_btree_check_thread, - &check_state->infos[i], - name); - if (IS_ERR(check_state->infos[i].thread)) { + &check_state.infos[i], + "bch_btrchk[%d]", i); + if (IS_ERR(check_state.infos[i].thread)) { pr_err("fails to run thread bch_btrchk[%d]\n", i); for (--i; i >= 0; i--) - kthread_stop(check_state->infos[i].thread); + kthread_stop(check_state.infos[i].thread); ret = -ENOMEM; goto out; } + atomic_inc(&check_state.started); } /* * Must wait for all threads to stop. */ - wait_event_interruptible(check_state->wait, - atomic_read(&check_state->started) == 0); + wait_event(check_state.wait, atomic_read(&check_state.started) == 0); - for (i = 0; i < check_state->total_threads; i++) { - if (check_state->infos[i].result) { - ret = check_state->infos[i].result; + for (i = 0; i < check_state.total_threads; i++) { + if (check_state.infos[i].result) { + ret = check_state.infos[i].result; goto out; } } out: - kfree(check_state); + rw_unlock(0, c->root); return ret; } diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 50482107134f..1b5fdbc0d83e 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -226,7 +226,7 @@ struct btree_check_info { int result; }; -#define BCH_BTR_CHKTHREAD_MAX 64 +#define BCH_BTR_CHKTHREAD_MAX 12 struct btree_check_state { struct cache_set *c; int total_threads; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index c6613e817333..aea4833f67fc 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -407,6 +407,11 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) return ret; } +void bch_journal_space_reserve(struct journal *j) +{ + j->do_reserve = true; +} + /* Journalling */ static void btree_flush_write(struct cache_set *c) @@ -625,12 +630,30 @@ static void do_journal_discard(struct cache *ca) } } +static unsigned int free_journal_buckets(struct cache_set *c) +{ + struct journal *j = &c->journal; + struct cache *ca = c->cache; + struct journal_device *ja = &c->cache->journal; + unsigned int n; + + /* In case njournal_buckets is not power of 2 */ + if (ja->cur_idx >= ja->discard_idx) + n = ca->sb.njournal_buckets + ja->discard_idx - ja->cur_idx; + else + n = ja->discard_idx - ja->cur_idx; + + if (n > (1 + j->do_reserve)) + return n - (1 + j->do_reserve); + + return 0; +} + static void journal_reclaim(struct cache_set *c) { struct bkey *k = &c->journal.key; struct cache *ca = c->cache; uint64_t last_seq; - unsigned int next; struct journal_device *ja = &ca->journal; atomic_t p __maybe_unused; @@ -653,12 +676,10 @@ static void journal_reclaim(struct cache_set *c) if (c->journal.blocks_free) goto out; - next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; - /* No space available on this device */ - if (next == ja->discard_idx) + if (!free_journal_buckets(c)) goto out; - ja->cur_idx = next; + ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets; k->ptr[0] = MAKE_PTR(0, bucket_to_sector(c, ca->sb.d[ja->cur_idx]), ca->sb.nr_this_dev); diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h index f2ea34d5f431..cd316b4a1e95 100644 --- a/drivers/md/bcache/journal.h +++ b/drivers/md/bcache/journal.h @@ -105,6 +105,7 @@ struct journal { spinlock_t lock; spinlock_t flush_write_lock; bool btree_flushing; + bool do_reserve; /* used when waiting because the journal was full */ struct closure_waitlist wait; struct closure io; @@ -182,5 +183,6 @@ int bch_journal_replay(struct cache_set *c, struct list_head *list); void bch_journal_free(struct cache_set *c); int bch_journal_alloc(struct cache_set *c); +void bch_journal_space_reserve(struct journal *j); #endif /* _BCACHE_JOURNAL_H */ diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 214326383145..97895262fc54 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1109,6 +1109,12 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) * which would call closure_get(&dc->disk.cl) */ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); + if (!ddip) { + bio->bi_status = BLK_STS_RESOURCE; + bio->bi_end_io(bio); + return; + } + ddip->d = d; /* Count on the bcache device */ ddip->start_time = part_start_io_acct(d->disk, &ddip->part, bio); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 81f1cc5b3499..7f5ea2509643 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2150,6 +2150,7 @@ static int run_cache_set(struct cache_set *c) flash_devs_run(c); + bch_journal_space_reserve(&c->journal); set_bit(CACHE_SET_RUNNING, &c->flags); return 0; err: diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 952253f24175..a878b959fbcd 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -756,13 +756,11 @@ static int bch_writeback_thread(void *arg) /* Init */ #define INIT_KEYS_EACH_TIME 500000 -#define INIT_KEYS_SLEEP_MS 100 struct sectors_dirty_init { struct btree_op op; unsigned int inode; size_t count; - struct bkey start; }; static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, @@ -778,11 +776,8 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, KEY_START(k), KEY_SIZE(k)); op->count++; - if (atomic_read(&b->c->search_inflight) && - !(op->count % INIT_KEYS_EACH_TIME)) { - bkey_copy_key(&op->start, k); - return -EAGAIN; - } + if (!(op->count % INIT_KEYS_EACH_TIME)) + cond_resched(); return MAP_CONTINUE; } @@ -797,24 +792,16 @@ static int bch_root_node_dirty_init(struct cache_set *c, bch_btree_op_init(&op.op, -1); op.inode = d->id; op.count = 0; - op.start = KEY(op.inode, 0, 0); - do { - ret = bcache_btree(map_keys_recurse, - k, - c->root, - &op.op, - &op.start, - sectors_dirty_init_fn, - 0); - if (ret == -EAGAIN) - schedule_timeout_interruptible( - msecs_to_jiffies(INIT_KEYS_SLEEP_MS)); - else if (ret < 0) { - pr_warn("sectors dirty init failed, ret=%d!\n", ret); - break; - } - } while (ret == -EAGAIN); + ret = bcache_btree(map_keys_recurse, + k, + c->root, + &op.op, + &KEY(op.inode, 0, 0), + sectors_dirty_init_fn, + 0); + if (ret < 0) + pr_warn("sectors dirty init failed, ret=%d!\n", ret); return ret; } @@ -858,7 +845,6 @@ static int bch_dirty_init_thread(void *arg) goto out; } skip_nr--; - cond_resched(); } if (p) { @@ -868,7 +854,6 @@ static int bch_dirty_init_thread(void *arg) p = NULL; prev_idx = cur_idx; - cond_resched(); } out: @@ -899,67 +884,56 @@ void bch_sectors_dirty_init(struct bcache_device *d) struct btree_iter iter; struct sectors_dirty_init op; struct cache_set *c = d->c; - struct bch_dirty_init_state *state; - char name[32]; + struct bch_dirty_init_state state; /* Just count root keys if no leaf node */ + rw_lock(0, c->root, c->root->level); if (c->root->level == 0) { bch_btree_op_init(&op.op, -1); op.inode = d->id; op.count = 0; - op.start = KEY(op.inode, 0, 0); for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) sectors_dirty_init_fn(&op.op, c->root, k); + + rw_unlock(0, c->root); return; } - state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL); - if (!state) { - pr_warn("sectors dirty init failed: cannot allocate memory\n"); - return; - } + memset(&state, 0, sizeof(struct bch_dirty_init_state)); + state.c = c; + state.d = d; + state.total_threads = bch_btre_dirty_init_thread_nr(); + state.key_idx = 0; + spin_lock_init(&state.idx_lock); + atomic_set(&state.started, 0); + atomic_set(&state.enough, 0); + init_waitqueue_head(&state.wait); - state->c = c; - state->d = d; - state->total_threads = bch_btre_dirty_init_thread_nr(); - state->key_idx = 0; - spin_lock_init(&state->idx_lock); - atomic_set(&state->started, 0); - atomic_set(&state->enough, 0); - init_waitqueue_head(&state->wait); - - for (i = 0; i < state->total_threads; i++) { - /* Fetch latest state->enough earlier */ + for (i = 0; i < state.total_threads; i++) { + /* Fetch latest state.enough earlier */ smp_mb__before_atomic(); - if (atomic_read(&state->enough)) + if (atomic_read(&state.enough)) break; - state->infos[i].state = state; - atomic_inc(&state->started); - snprintf(name, sizeof(name), "bch_dirty_init[%d]", i); - - state->infos[i].thread = - kthread_run(bch_dirty_init_thread, - &state->infos[i], - name); - if (IS_ERR(state->infos[i].thread)) { + state.infos[i].state = &state; + state.infos[i].thread = + kthread_run(bch_dirty_init_thread, &state.infos[i], + "bch_dirtcnt[%d]", i); + if (IS_ERR(state.infos[i].thread)) { pr_err("fails to run thread bch_dirty_init[%d]\n", i); for (--i; i >= 0; i--) - kthread_stop(state->infos[i].thread); + kthread_stop(state.infos[i].thread); goto out; } + atomic_inc(&state.started); } - /* - * Must wait for all threads to stop. - */ - wait_event_interruptible(state->wait, - atomic_read(&state->started) == 0); - out: - kfree(state); + /* Must wait for all threads to stop. */ + wait_event(state.wait, atomic_read(&state.started) == 0); + rw_unlock(0, c->root); } void bch_cached_dev_writeback_init(struct cached_dev *dc) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 3f1230e22de0..0f1d96920630 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -16,7 +16,7 @@ #define BCH_AUTO_GC_DIRTY_THRESHOLD 50 -#define BCH_DIRTY_INIT_THRD_MAX 64 +#define BCH_DIRTY_INIT_THRD_MAX 12 /* * 14 (16384ths) is chosen here as something that each backing device * should be a reasonable fraction of the share, and not to blow up diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b9677f701b6a..3d975db86434 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3404,6 +3404,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } +static char hex2asc(unsigned char c) +{ + return c + '0' + ((unsigned)(9 - c) >> 4 & 0x27); +} + static void crypt_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { @@ -3422,9 +3427,12 @@ static void crypt_status(struct dm_target *ti, status_type_t type, if (cc->key_size > 0) { if (cc->key_string) DMEMIT(":%u:%s", cc->key_size, cc->key_string); - else - for (i = 0; i < cc->key_size; i++) - DMEMIT("%02x", cc->key[i]); + else { + for (i = 0; i < cc->key_size; i++) { + DMEMIT("%c%c", hex2asc(cc->key[i] >> 4), + hex2asc(cc->key[i] & 0xf)); + } + } } else DMEMIT("-"); diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index d9ac7372108c..96bad057bea2 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1396,7 +1396,7 @@ static void start_worker(struct era *era) static void stop_worker(struct era *era) { atomic_set(&era->suspended, 1); - flush_workqueue(era->wq); + drain_workqueue(era->wq); } /*---------------------------------------------------------------- @@ -1566,6 +1566,12 @@ static void era_postsuspend(struct dm_target *ti) } stop_worker(era); + + r = metadata_commit(era->md); + if (r) { + DMERR("%s: metadata_commit failed", __func__); + /* FIXME: fail mode */ + } } static int era_preresume(struct dm_target *ti) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 6f085e96c3f3..835b1f3464d0 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4327,8 +4327,6 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (should_write_sb) { - int r; - init_journal(ic, 0, ic->journal_sections, 0); r = dm_integrity_failed(ic); if (unlikely(r)) { diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 33e71ea6cc14..fe3a9473f338 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -415,8 +415,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, /* * Work out how many "unsigned long"s we need to hold the bitset. */ - bitset_size = dm_round_up(region_count, - sizeof(*lc->clean_bits) << BYTE_SHIFT); + bitset_size = dm_round_up(region_count, BITS_PER_LONG); bitset_size >>= BYTE_SHIFT; lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits); @@ -616,7 +615,7 @@ static int disk_resume(struct dm_dirty_log *log) log_clear_bit(lc, lc->clean_bits, i); /* clear any old bits -- device has shrunk */ - for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++) + for (i = lc->region_count; i % BITS_PER_LONG; i++) log_clear_bit(lc, lc->clean_bits, i); /* copy clean across to sync */ diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index f5083b4a0195..4e94200e0142 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1002,12 +1002,13 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) static int validate_raid_redundancy(struct raid_set *rs) { unsigned int i, rebuild_cnt = 0; - unsigned int rebuilds_per_group = 0, copies; + unsigned int rebuilds_per_group = 0, copies, raid_disks; unsigned int group_size, last_group_start; - for (i = 0; i < rs->md.raid_disks; i++) - if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || - !rs->dev[i].rdev.sb_page) + for (i = 0; i < rs->raid_disks; i++) + if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) && + ((!test_bit(In_sync, &rs->dev[i].rdev.flags) || + !rs->dev[i].rdev.sb_page))) rebuild_cnt++; switch (rs->md.level) { @@ -1047,8 +1048,9 @@ static int validate_raid_redundancy(struct raid_set *rs) * A A B B C * C D D E E */ + raid_disks = min(rs->raid_disks, rs->md.raid_disks); if (__is_raid10_near(rs->md.new_layout)) { - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < raid_disks; i++) { if (!(i % copies)) rebuilds_per_group = 0; if ((!rs->dev[i].rdev.sb_page || @@ -1071,10 +1073,10 @@ static int validate_raid_redundancy(struct raid_set *rs) * results in the need to treat the last (potentially larger) * set differently. */ - group_size = (rs->md.raid_disks / copies); - last_group_start = (rs->md.raid_disks / group_size) - 1; + group_size = (raid_disks / copies); + last_group_start = (raid_disks / group_size) - 1; last_group_start *= group_size; - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < raid_disks; i++) { if (!(i % copies) && !(i > last_group_start)) rebuilds_per_group = 0; if ((!rs->dev[i].rdev.sb_page || @@ -1589,7 +1591,7 @@ static sector_t __rdev_sectors(struct raid_set *rs) { int i; - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < rs->raid_disks; i++) { struct md_rdev *rdev = &rs->dev[i].rdev; if (!test_bit(Journal, &rdev->flags) && @@ -3732,13 +3734,13 @@ static int raid_iterate_devices(struct dm_target *ti, unsigned int i; int r = 0; - for (i = 0; !r && i < rs->md.raid_disks; i++) - if (rs->dev[i].data_dev) - r = fn(ti, - rs->dev[i].data_dev, - 0, /* No offset on data devs */ - rs->md.dev_sectors, - data); + for (i = 0; !r && i < rs->raid_disks; i++) { + if (rs->dev[i].data_dev) { + r = fn(ti, rs->dev[i].data_dev, + 0, /* No offset on data devs */ + rs->md.dev_sectors, data); + } + } return r; } diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 35d368c418d0..55443a6598fa 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -224,6 +224,7 @@ void dm_stats_cleanup(struct dm_stats *stats) atomic_read(&shared->in_flight[READ]), atomic_read(&shared->in_flight[WRITE])); } + cond_resched(); } dm_stat_free(&s->rcu_head); } @@ -313,6 +314,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, for (ni = 0; ni < n_entries; ni++) { atomic_set(&s->stat_shared[ni].in_flight[READ], 0); atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0); + cond_resched(); } if (s->n_histogram_entries) { @@ -325,6 +327,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, for (ni = 0; ni < n_entries; ni++) { s->stat_shared[ni].tmp.histogram = hi; hi += s->n_histogram_entries + 1; + cond_resched(); } } @@ -345,6 +348,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, for (ni = 0; ni < n_entries; ni++) { p[ni].histogram = hi; hi += s->n_histogram_entries + 1; + cond_resched(); } } } @@ -474,6 +478,7 @@ static int dm_stats_list(struct dm_stats *stats, const char *program, } DMEMIT("\n"); } + cond_resched(); } mutex_unlock(&stats->mutex); @@ -750,6 +755,7 @@ static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end, local_irq_enable(); } } + cond_resched(); } } @@ -865,6 +871,8 @@ static int dm_stats_print(struct dm_stats *stats, int id, if (unlikely(sz + 1 >= maxlen)) goto buffer_overflow; + + cond_resched(); } if (clear) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 2c355b58d078..1c8038c665ec 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -1251,6 +1251,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) static struct target_type verity_target = { .name = "verity", + .features = DM_TARGET_IMMUTABLE, .version = {1, 7, 0}, .features = DM_TARGET_IMMUTABLE, .module = THIS_MODULE, diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index ea3130e11680..d377ea060925 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -639,14 +639,6 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); sectors_reserved = le32_to_cpu(sb->sectors_reserved); - /* Setup nodes/clustername only if bitmap version is - * cluster-compatible - */ - if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { - nodes = le32_to_cpu(sb->nodes); - strlcpy(bitmap->mddev->bitmap_info.cluster_name, - sb->cluster_name, 64); - } /* verify that the bitmap-specific fields are valid */ if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) @@ -668,6 +660,16 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) goto out; } + /* + * Setup nodes/clustername only if bitmap version is + * cluster-compatible + */ + if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { + nodes = le32_to_cpu(sb->nodes); + strlcpy(bitmap->mddev->bitmap_info.cluster_name, + sb->cluster_name, 64); + } + /* keep the array size field of the bitmap superblock up to date */ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); @@ -700,9 +702,9 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) out: kunmap_atomic(sb); - /* Assigning chunksize is required for "re_read" */ - bitmap->mddev->bitmap_info.chunksize = chunksize; if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { + /* Assigning chunksize is required for "re_read" */ + bitmap->mddev->bitmap_info.chunksize = chunksize; err = md_setup_cluster(bitmap->mddev, nodes); if (err) { pr_warn("%s: Could not setup cluster service (%d)\n", @@ -713,18 +715,18 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) goto re_read; } - out_no_sb: - if (test_bit(BITMAP_STALE, &bitmap->flags)) - bitmap->events_cleared = bitmap->mddev->events; - bitmap->mddev->bitmap_info.chunksize = chunksize; - bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; - bitmap->mddev->bitmap_info.max_write_behind = write_behind; - bitmap->mddev->bitmap_info.nodes = nodes; - if (bitmap->mddev->bitmap_info.space == 0 || - bitmap->mddev->bitmap_info.space > sectors_reserved) - bitmap->mddev->bitmap_info.space = sectors_reserved; - if (err) { + if (err == 0) { + if (test_bit(BITMAP_STALE, &bitmap->flags)) + bitmap->events_cleared = bitmap->mddev->events; + bitmap->mddev->bitmap_info.chunksize = chunksize; + bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; + bitmap->mddev->bitmap_info.max_write_behind = write_behind; + bitmap->mddev->bitmap_info.nodes = nodes; + if (bitmap->mddev->bitmap_info.space == 0 || + bitmap->mddev->bitmap_info.space > sectors_reserved) + bitmap->mddev->bitmap_info.space = sectors_reserved; + } else { md_bitmap_print_sb(bitmap); if (bitmap->cluster_slot < 0) md_cluster_stop(bitmap->mddev); diff --git a/drivers/md/md.c b/drivers/md/md.c index cc3876500c4b..5bd1edbb415b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2648,14 +2648,16 @@ static void sync_sbs(struct mddev *mddev, int nospares) static bool does_sb_need_changing(struct mddev *mddev) { - struct md_rdev *rdev; + struct md_rdev *rdev = NULL, *iter; struct mdp_superblock_1 *sb; int role; /* Find a good rdev */ - rdev_for_each(rdev, mddev) - if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags)) + rdev_for_each(iter, mddev) + if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) { + rdev = iter; break; + } /* No good device found. */ if (!rdev) @@ -7968,17 +7970,22 @@ EXPORT_SYMBOL(md_register_thread); void md_unregister_thread(struct md_thread **threadp) { - struct md_thread *thread = *threadp; - if (!thread) - return; - pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); - /* Locking ensures that mddev_unlock does not wake_up a + struct md_thread *thread; + + /* + * Locking ensures that mddev_unlock does not wake_up a * non-existent thread */ spin_lock(&pers_lock); + thread = *threadp; + if (!thread) { + spin_unlock(&pers_lock); + return; + } *threadp = NULL; spin_unlock(&pers_lock); + pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); kthread_stop(thread->tsk); kfree(thread); } @@ -9728,16 +9735,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev) void md_reload_sb(struct mddev *mddev, int nr) { - struct md_rdev *rdev; + struct md_rdev *rdev = NULL, *iter; int err; /* Find the rdev */ - rdev_for_each_rcu(rdev, mddev) { - if (rdev->desc_nr == nr) + rdev_for_each_rcu(iter, mddev) { + if (iter->desc_nr == nr) { + rdev = iter; break; + } } - if (!rdev || rdev->desc_nr != nr) { + if (!rdev) { pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr); return; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 35843df15b5e..a4c0cafa6010 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) pr_debug("md/raid0:%s: FINAL %d zones\n", mdname(mddev), conf->nr_strip_zones); - if (conf->nr_strip_zones == 1) { - conf->layout = RAID0_ORIG_LAYOUT; - } else if (mddev->layout == RAID0_ORIG_LAYOUT || - mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) { - conf->layout = mddev->layout; - } else if (default_layout == RAID0_ORIG_LAYOUT || - default_layout == RAID0_ALT_MULTIZONE_LAYOUT) { - conf->layout = default_layout; - } else { - pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", - mdname(mddev)); - pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); - err = -ENOTSUPP; - goto abort; - } /* * now since we have the hard sector sizes, we can make sure * chunk size is a multiple of that sector size @@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) (unsigned long long)smallest->sectors); } + if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) { + conf->layout = RAID0_ORIG_LAYOUT; + } else if (mddev->layout == RAID0_ORIG_LAYOUT || + mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) { + conf->layout = mddev->layout; + } else if (default_layout == RAID0_ORIG_LAYOUT || + default_layout == RAID0_ALT_MULTIZONE_LAYOUT) { + conf->layout = default_layout; + } else { + pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", + mdname(mddev)); + pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); + err = -EOPNOTSUPP; + goto abort; + } + pr_debug("md/raid0:%s: done.\n", mdname(mddev)); *private_conf = conf; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 39343479ac2a..26b35125d3c3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -686,17 +686,17 @@ int raid5_calc_degraded(struct r5conf *conf) return degraded; } -static int has_failed(struct r5conf *conf) +static bool has_failed(struct r5conf *conf) { - int degraded; + int degraded = conf->mddev->degraded; - if (conf->mddev->reshape_position == MaxSector) - return conf->mddev->degraded > conf->max_degraded; + if (test_bit(MD_BROKEN, &conf->mddev->flags)) + return true; - degraded = raid5_calc_degraded(conf); - if (degraded > conf->max_degraded) - return 1; - return 0; + if (conf->mddev->reshape_position != MaxSector) + degraded = raid5_calc_degraded(conf); + + return degraded > conf->max_degraded; } struct stripe_head * @@ -2876,34 +2876,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) unsigned long flags; pr_debug("raid456: error called\n"); + pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n", + mdname(mddev), bdevname(rdev->bdev, b)); + spin_lock_irqsave(&conf->device_lock, flags); - - if (test_bit(In_sync, &rdev->flags) && - mddev->degraded == conf->max_degraded) { - /* - * Don't allow to achieve failed state - * Don't try to recover this device - */ - conf->recovery_disabled = mddev->recovery_disabled; - spin_unlock_irqrestore(&conf->device_lock, flags); - return; - } - set_bit(Faulty, &rdev->flags); clear_bit(In_sync, &rdev->flags); mddev->degraded = raid5_calc_degraded(conf); + + if (has_failed(conf)) { + set_bit(MD_BROKEN, &conf->mddev->flags); + conf->recovery_disabled = mddev->recovery_disabled; + + pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n", + mdname(mddev), mddev->degraded, conf->raid_disks); + } else { + pr_crit("md/raid:%s: Operation continuing on %d devices.\n", + mdname(mddev), conf->raid_disks - mddev->degraded); + } + spin_unlock_irqrestore(&conf->device_lock, flags); set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(Blocked, &rdev->flags); set_mask_bits(&mddev->sb_flags, 0, BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING)); - pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n" - "md/raid:%s: Operation continuing on %d devices.\n", - mdname(mddev), - bdevname(rdev->bdev, b), - mdname(mddev), - conf->raid_disks - mddev->degraded); r5c_update_on_rdev_error(mddev, rdev); } @@ -8006,6 +8003,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) */ if (rdev->saved_raid_disk >= 0 && rdev->saved_raid_disk >= first && + rdev->saved_raid_disk <= last && conf->disks[rdev->saved_raid_disk].rdev == NULL) first = rdev->saved_raid_disk; diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c index 2e5698fbc3a8..e23aa608f66f 100644 --- a/drivers/media/cec/core/cec-adap.c +++ b/drivers/media/cec/core/cec-adap.c @@ -1271,7 +1271,7 @@ static int cec_config_log_addr(struct cec_adapter *adap, * While trying to poll the physical address was reset * and the adapter was unconfigured, so bail out. */ - if (!adap->is_configuring) + if (adap->phys_addr == CEC_PHYS_ADDR_INVALID) return -EINTR; if (err) @@ -1328,7 +1328,6 @@ static void cec_adap_unconfigure(struct cec_adapter *adap) adap->phys_addr != CEC_PHYS_ADDR_INVALID) WARN_ON(adap->ops->adap_log_addr(adap, CEC_LOG_ADDR_INVALID)); adap->log_addrs.log_addr_mask = 0; - adap->is_configuring = false; adap->is_configured = false; cec_flush(adap); wake_up_interruptible(&adap->kthread_waitq); @@ -1520,9 +1519,10 @@ static int cec_config_thread_func(void *arg) for (i = 0; i < las->num_log_addrs; i++) las->log_addr[i] = CEC_LOG_ADDR_INVALID; cec_adap_unconfigure(adap); + adap->is_configuring = false; adap->kthread_config = NULL; - mutex_unlock(&adap->lock); complete(&adap->config_completion); + mutex_unlock(&adap->lock); return 0; } diff --git a/drivers/media/i2c/ov7670.c b/drivers/media/i2c/ov7670.c index b42b289faaef..154776d0069e 100644 --- a/drivers/media/i2c/ov7670.c +++ b/drivers/media/i2c/ov7670.c @@ -2000,7 +2000,6 @@ static int ov7670_remove(struct i2c_client *client) v4l2_async_unregister_subdev(sd); v4l2_ctrl_handler_free(&info->hdl); media_entity_cleanup(&info->sd.entity); - ov7670_power_off(sd); return 0; } diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c index 4e8132d4b2df..a23c025595a0 100644 --- a/drivers/media/pci/cx23885/cx23885-core.c +++ b/drivers/media/pci/cx23885/cx23885-core.c @@ -2154,7 +2154,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev, err = pci_set_dma_mask(pci_dev, 0xffffffff); if (err) { pr_err("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name); - goto fail_ctrl; + goto fail_dma_set_mask; } err = request_irq(pci_dev->irq, cx23885_irq, @@ -2162,7 +2162,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev, if (err < 0) { pr_err("%s: can't get IRQ %d\n", dev->name, pci_dev->irq); - goto fail_irq; + goto fail_dma_set_mask; } switch (dev->board) { @@ -2184,7 +2184,7 @@ static int cx23885_initdev(struct pci_dev *pci_dev, return 0; -fail_irq: +fail_dma_set_mask: cx23885_dev_unregister(dev); fail_ctrl: v4l2_ctrl_handler_free(hdl); diff --git a/drivers/media/pci/cx25821/cx25821-core.c b/drivers/media/pci/cx25821/cx25821-core.c index 285047b32c44..a3d45287a534 100644 --- a/drivers/media/pci/cx25821/cx25821-core.c +++ b/drivers/media/pci/cx25821/cx25821-core.c @@ -1340,11 +1340,11 @@ static void cx25821_finidev(struct pci_dev *pci_dev) struct cx25821_dev *dev = get_cx25821(v4l2_dev); cx25821_shutdown(dev); - pci_disable_device(pci_dev); /* unregister stuff */ if (pci_dev->irq) free_irq(pci_dev->irq, dev); + pci_disable_device(pci_dev); cx25821_dev_unregister(dev); v4l2_device_unregister(v4l2_dev); diff --git a/drivers/media/platform/aspeed-video.c b/drivers/media/platform/aspeed-video.c index 757a58829a51..9d9124308f6a 100644 --- a/drivers/media/platform/aspeed-video.c +++ b/drivers/media/platform/aspeed-video.c @@ -1723,6 +1723,7 @@ static int aspeed_video_probe(struct platform_device *pdev) rc = aspeed_video_setup_video(video); if (rc) { + aspeed_video_free_buf(video, &video->jpeg); clk_unprepare(video->vclk); clk_unprepare(video->eclk); return rc; @@ -1748,8 +1749,7 @@ static int aspeed_video_remove(struct platform_device *pdev) v4l2_device_unregister(v4l2_dev); - dma_free_coherent(video->dev, VE_JPEG_HEADER_SIZE, video->jpeg.virt, - video->jpeg.dma); + aspeed_video_free_buf(video, &video->jpeg); of_reserved_mem_device_release(dev); diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 2333079a83c7..14d4830d5db5 100644 --- a/drivers/media/platform/coda/coda-common.c +++ b/drivers/media/platform/coda/coda-common.c @@ -1318,7 +1318,8 @@ static int coda_enum_frameintervals(struct file *file, void *fh, struct v4l2_frmivalenum *f) { struct coda_ctx *ctx = fh_to_ctx(fh); - int i; + struct coda_q_data *q_data; + const struct coda_codec *codec; if (f->index) return -EINVAL; @@ -1327,12 +1328,19 @@ static int coda_enum_frameintervals(struct file *file, void *fh, if (!ctx->vdoa && f->pixel_format == V4L2_PIX_FMT_YUYV) return -EINVAL; - for (i = 0; i < CODA_MAX_FORMATS; i++) { - if (f->pixel_format == ctx->cvd->src_formats[i] || - f->pixel_format == ctx->cvd->dst_formats[i]) - break; + if (coda_format_normalize_yuv(f->pixel_format) == V4L2_PIX_FMT_YUV420) { + q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + codec = coda_find_codec(ctx->dev, f->pixel_format, + q_data->fourcc); + } else { + codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420, + f->pixel_format); } - if (i == CODA_MAX_FORMATS) + if (!codec) + return -EINVAL; + + if (f->width < MIN_W || f->width > codec->max_w || + f->height < MIN_H || f->height > codec->max_h) return -EINVAL; f->type = V4L2_FRMIVAL_TYPE_CONTINUOUS; @@ -2335,8 +2343,8 @@ static void coda_encode_ctrls(struct coda_ctx *ctx) V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET, -12, 12, 1, 0); v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE, - V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE, 0x0, - V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE); + V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE, 0x0, + V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE); if (ctx->dev->devtype->product == CODA_HX4 || ctx->dev->devtype->product == CODA_7541) { v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops, @@ -2350,12 +2358,15 @@ static void coda_encode_ctrls(struct coda_ctx *ctx) if (ctx->dev->devtype->product == CODA_960) { v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_LEVEL, - V4L2_MPEG_VIDEO_H264_LEVEL_4_0, - ~((1 << V4L2_MPEG_VIDEO_H264_LEVEL_2_0) | + V4L2_MPEG_VIDEO_H264_LEVEL_4_2, + ~((1 << V4L2_MPEG_VIDEO_H264_LEVEL_1_0) | + (1 << V4L2_MPEG_VIDEO_H264_LEVEL_2_0) | (1 << V4L2_MPEG_VIDEO_H264_LEVEL_3_0) | (1 << V4L2_MPEG_VIDEO_H264_LEVEL_3_1) | (1 << V4L2_MPEG_VIDEO_H264_LEVEL_3_2) | - (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_0)), + (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_0) | + (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_1) | + (1 << V4L2_MPEG_VIDEO_H264_LEVEL_4_2)), V4L2_MPEG_VIDEO_H264_LEVEL_4_0); } v4l2_ctrl_new_std(&ctx->ctrls, &coda_ctrl_ops, @@ -2417,7 +2428,7 @@ static void coda_decode_ctrls(struct coda_ctx *ctx) ctx->h264_profile_ctrl = v4l2_ctrl_new_std_menu(&ctx->ctrls, &coda_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH, - ~((1 << V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE) | + ~((1 << V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE) | (1 << V4L2_MPEG_VIDEO_H264_PROFILE_MAIN) | (1 << V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)), V4L2_MPEG_VIDEO_H264_PROFILE_HIGH); diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c index d26fa5967d82..dc2a144cd29b 100644 --- a/drivers/media/platform/exynos4-is/fimc-is.c +++ b/drivers/media/platform/exynos4-is/fimc-is.c @@ -140,7 +140,7 @@ static int fimc_is_enable_clocks(struct fimc_is *is) dev_err(&is->pdev->dev, "clock %s enable failed\n", fimc_is_clocks[i]); for (--i; i >= 0; i--) - clk_disable(is->clocks[i]); + clk_disable_unprepare(is->clocks[i]); return ret; } pr_debug("enabled clock: %s\n", fimc_is_clocks[i]); @@ -830,7 +830,7 @@ static int fimc_is_probe(struct platform_device *pdev) ret = pm_runtime_resume_and_get(dev); if (ret < 0) - goto err_irq; + goto err_pm_disable; vb2_dma_contig_set_max_seg_size(dev, DMA_BIT_MASK(32)); @@ -864,6 +864,8 @@ static int fimc_is_probe(struct platform_device *pdev) pm_runtime_put_noidle(dev); if (!pm_runtime_enabled(dev)) fimc_is_runtime_suspend(dev); +err_pm_disable: + pm_runtime_disable(dev); err_irq: free_irq(is->irq, is); err_clk: diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.h b/drivers/media/platform/exynos4-is/fimc-isp-video.h index edcb3a5e3cb9..2dd4ddbc748a 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp-video.h +++ b/drivers/media/platform/exynos4-is/fimc-isp-video.h @@ -32,7 +32,7 @@ static inline int fimc_isp_video_device_register(struct fimc_isp *isp, return 0; } -void fimc_isp_video_device_unregister(struct fimc_isp *isp, +static inline void fimc_isp_video_device_unregister(struct fimc_isp *isp, enum v4l2_buf_type type) { } diff --git a/drivers/media/platform/qcom/venus/hfi.c b/drivers/media/platform/qcom/venus/hfi.c index a59022adb14c..966b4d9b57a9 100644 --- a/drivers/media/platform/qcom/venus/hfi.c +++ b/drivers/media/platform/qcom/venus/hfi.c @@ -104,6 +104,9 @@ int hfi_core_deinit(struct venus_core *core, bool blocking) mutex_lock(&core->lock); } + if (!core->ops) + goto unlock; + ret = core->ops->core_deinit(core); if (!ret) diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c index d99ea8973b67..e3246344fb72 100644 --- a/drivers/media/platform/rockchip/rga/rga.c +++ b/drivers/media/platform/rockchip/rga/rga.c @@ -868,7 +868,7 @@ static int rga_probe(struct platform_device *pdev) ret = pm_runtime_resume_and_get(rga->dev); if (ret < 0) - goto rel_vdev; + goto rel_m2m; rga->version.major = (rga_read(rga, RGA_VERSION_INFO) >> 24) & 0xFF; rga->version.minor = (rga_read(rga, RGA_VERSION_INFO) >> 20) & 0x0F; @@ -884,7 +884,7 @@ static int rga_probe(struct platform_device *pdev) DMA_ATTR_WRITE_COMBINE); if (!rga->cmdbuf_virt) { ret = -ENOMEM; - goto rel_vdev; + goto rel_m2m; } rga->src_mmu_pages = @@ -921,6 +921,8 @@ static int rga_probe(struct platform_device *pdev) free_dma: dma_free_attrs(rga->dev, RGA_CMDBUF_SIZE, rga->cmdbuf_virt, rga->cmdbuf_phy, DMA_ATTR_WRITE_COMBINE); +rel_m2m: + v4l2_m2m_release(rga->m2m_dev); rel_vdev: video_device_release(vfd); unreg_v4l2_dev: diff --git a/drivers/media/platform/sti/delta/delta-v4l2.c b/drivers/media/platform/sti/delta/delta-v4l2.c index c691b3d81549..5da49a0ab70b 100644 --- a/drivers/media/platform/sti/delta/delta-v4l2.c +++ b/drivers/media/platform/sti/delta/delta-v4l2.c @@ -1862,7 +1862,7 @@ static int delta_probe(struct platform_device *pdev) if (ret) { dev_err(delta->dev, "%s failed to initialize firmware ipc channel\n", DELTA_PREFIX); - goto err; + goto err_pm_disable; } /* register all available decoders */ @@ -1876,7 +1876,7 @@ static int delta_probe(struct platform_device *pdev) if (ret) { dev_err(delta->dev, "%s failed to register V4L2 device\n", DELTA_PREFIX); - goto err; + goto err_pm_disable; } delta->work_queue = create_workqueue(DELTA_NAME); @@ -1901,6 +1901,8 @@ static int delta_probe(struct platform_device *pdev) destroy_workqueue(delta->work_queue); err_v4l2: v4l2_device_unregister(&delta->v4l2_dev); +err_pm_disable: + pm_runtime_disable(dev); err: return ret; } diff --git a/drivers/media/platform/vsp1/vsp1_rpf.c b/drivers/media/platform/vsp1/vsp1_rpf.c index 85587c1b6a37..75083cb234fe 100644 --- a/drivers/media/platform/vsp1/vsp1_rpf.c +++ b/drivers/media/platform/vsp1/vsp1_rpf.c @@ -291,11 +291,11 @@ static void rpf_configure_partition(struct vsp1_entity *entity, + crop.left * fmtinfo->bpp[0] / 8; if (format->num_planes > 1) { + unsigned int bpl = format->plane_fmt[1].bytesperline; unsigned int offset; - offset = crop.top * format->plane_fmt[1].bytesperline - + crop.left / fmtinfo->hsub - * fmtinfo->bpp[1] / 8; + offset = crop.top / fmtinfo->vsub * bpl + + crop.left / fmtinfo->hsub * fmtinfo->bpp[1] / 8; mem.addr[1] += offset; mem.addr[2] += offset; } diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index a7962ca2ac8e..bc9ac6002e25 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -153,6 +153,24 @@ struct imon_context { const struct imon_usb_dev_descr *dev_descr; /* device description with key */ /* table for front panels */ + /* + * Fields for deferring free_imon_context(). + * + * Since reference to "struct imon_context" is stored into + * "struct file"->private_data, we need to remember + * how many file descriptors might access this "struct imon_context". + */ + refcount_t users; + /* + * Use a flag for telling display_open()/vfd_write()/lcd_write() that + * imon_disconnect() was already called. + */ + bool disconnected; + /* + * We need to wait for RCU grace period in order to allow + * display_open() to safely check ->disconnected and increment ->users. + */ + struct rcu_head rcu; }; #define TOUCH_TIMEOUT (HZ/30) @@ -160,18 +178,18 @@ struct imon_context { /* vfd character device file operations */ static const struct file_operations vfd_fops = { .owner = THIS_MODULE, - .open = &display_open, - .write = &vfd_write, - .release = &display_close, + .open = display_open, + .write = vfd_write, + .release = display_close, .llseek = noop_llseek, }; /* lcd character device file operations */ static const struct file_operations lcd_fops = { .owner = THIS_MODULE, - .open = &display_open, - .write = &lcd_write, - .release = &display_close, + .open = display_open, + .write = lcd_write, + .release = display_close, .llseek = noop_llseek, }; @@ -439,9 +457,6 @@ static struct usb_driver imon_driver = { .id_table = imon_usb_id_table, }; -/* to prevent races between open() and disconnect(), probing, etc */ -static DEFINE_MUTEX(driver_lock); - /* Module bookkeeping bits */ MODULE_AUTHOR(MOD_AUTHOR); MODULE_DESCRIPTION(MOD_DESC); @@ -481,9 +496,11 @@ static void free_imon_context(struct imon_context *ictx) struct device *dev = ictx->dev; usb_free_urb(ictx->tx_urb); + WARN_ON(ictx->dev_present_intf0); usb_free_urb(ictx->rx_urb_intf0); + WARN_ON(ictx->dev_present_intf1); usb_free_urb(ictx->rx_urb_intf1); - kfree(ictx); + kfree_rcu(ictx, rcu); dev_dbg(dev, "%s: iMON context freed\n", __func__); } @@ -499,9 +516,6 @@ static int display_open(struct inode *inode, struct file *file) int subminor; int retval = 0; - /* prevent races with disconnect */ - mutex_lock(&driver_lock); - subminor = iminor(inode); interface = usb_find_interface(&imon_driver, subminor); if (!interface) { @@ -509,13 +523,16 @@ static int display_open(struct inode *inode, struct file *file) retval = -ENODEV; goto exit; } - ictx = usb_get_intfdata(interface); - if (!ictx) { + rcu_read_lock(); + ictx = usb_get_intfdata(interface); + if (!ictx || ictx->disconnected || !refcount_inc_not_zero(&ictx->users)) { + rcu_read_unlock(); pr_err("no context found for minor %d\n", subminor); retval = -ENODEV; goto exit; } + rcu_read_unlock(); mutex_lock(&ictx->lock); @@ -533,8 +550,10 @@ static int display_open(struct inode *inode, struct file *file) mutex_unlock(&ictx->lock); + if (retval && refcount_dec_and_test(&ictx->users)) + free_imon_context(ictx); + exit: - mutex_unlock(&driver_lock); return retval; } @@ -544,16 +563,9 @@ static int display_open(struct inode *inode, struct file *file) */ static int display_close(struct inode *inode, struct file *file) { - struct imon_context *ictx = NULL; + struct imon_context *ictx = file->private_data; int retval = 0; - ictx = file->private_data; - - if (!ictx) { - pr_err("no context for device\n"); - return -ENODEV; - } - mutex_lock(&ictx->lock); if (!ictx->display_supported) { @@ -568,6 +580,8 @@ static int display_close(struct inode *inode, struct file *file) } mutex_unlock(&ictx->lock); + if (refcount_dec_and_test(&ictx->users)) + free_imon_context(ictx); return retval; } @@ -937,15 +951,12 @@ static ssize_t vfd_write(struct file *file, const char __user *buf, int offset; int seq; int retval = 0; - struct imon_context *ictx; + struct imon_context *ictx = file->private_data; static const unsigned char vfd_packet6[] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF }; - ictx = file->private_data; - if (!ictx) { - pr_err_ratelimited("no context for device\n"); + if (ictx->disconnected) return -ENODEV; - } mutex_lock(&ictx->lock); @@ -1021,13 +1032,10 @@ static ssize_t lcd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos) { int retval = 0; - struct imon_context *ictx; + struct imon_context *ictx = file->private_data; - ictx = file->private_data; - if (!ictx) { - pr_err_ratelimited("no context for device\n"); + if (ictx->disconnected) return -ENODEV; - } mutex_lock(&ictx->lock); @@ -2405,7 +2413,6 @@ static int imon_probe(struct usb_interface *interface, int ifnum, sysfs_err; int ret = 0; struct imon_context *ictx = NULL; - struct imon_context *first_if_ctx = NULL; u16 vendor, product; usbdev = usb_get_dev(interface_to_usbdev(interface)); @@ -2417,17 +2424,12 @@ static int imon_probe(struct usb_interface *interface, dev_dbg(dev, "%s: found iMON device (%04x:%04x, intf%d)\n", __func__, vendor, product, ifnum); - /* prevent races probing devices w/multiple interfaces */ - mutex_lock(&driver_lock); - first_if = usb_ifnum_to_if(usbdev, 0); if (!first_if) { ret = -ENODEV; goto fail; } - first_if_ctx = usb_get_intfdata(first_if); - if (ifnum == 0) { ictx = imon_init_intf0(interface, id); if (!ictx) { @@ -2435,9 +2437,11 @@ static int imon_probe(struct usb_interface *interface, ret = -ENODEV; goto fail; } + refcount_set(&ictx->users, 1); } else { /* this is the secondary interface on the device */ + struct imon_context *first_if_ctx = usb_get_intfdata(first_if); /* fail early if first intf failed to register */ if (!first_if_ctx) { @@ -2451,14 +2455,13 @@ static int imon_probe(struct usb_interface *interface, ret = -ENODEV; goto fail; } + refcount_inc(&ictx->users); } usb_set_intfdata(interface, ictx); if (ifnum == 0) { - mutex_lock(&ictx->lock); - if (product == 0xffdc && ictx->rf_device) { sysfs_err = sysfs_create_group(&interface->dev.kobj, &imon_rf_attr_group); @@ -2469,21 +2472,17 @@ static int imon_probe(struct usb_interface *interface, if (ictx->display_supported) imon_init_display(ictx, interface); - - mutex_unlock(&ictx->lock); } dev_info(dev, "iMON device (%04x:%04x, intf%d) on usb<%d:%d> initialized\n", vendor, product, ifnum, usbdev->bus->busnum, usbdev->devnum); - mutex_unlock(&driver_lock); usb_put_dev(usbdev); return 0; fail: - mutex_unlock(&driver_lock); usb_put_dev(usbdev); dev_err(dev, "unable to register, err %d\n", ret); @@ -2499,10 +2498,8 @@ static void imon_disconnect(struct usb_interface *interface) struct device *dev; int ifnum; - /* prevent races with multi-interface device probing and display_open */ - mutex_lock(&driver_lock); - ictx = usb_get_intfdata(interface); + ictx->disconnected = true; dev = ictx->dev; ifnum = interface->cur_altsetting->desc.bInterfaceNumber; @@ -2543,11 +2540,9 @@ static void imon_disconnect(struct usb_interface *interface) } } - if (!ictx->dev_present_intf0 && !ictx->dev_present_intf1) + if (refcount_dec_and_test(&ictx->users)) free_imon_context(ictx); - mutex_unlock(&driver_lock); - dev_dbg(dev, "%s: iMON device (intf%d) disconnected\n", __func__, ifnum); } diff --git a/drivers/media/test-drivers/vim2m.c b/drivers/media/test-drivers/vim2m.c index a776bb8e0e09..a24624353f9e 100644 --- a/drivers/media/test-drivers/vim2m.c +++ b/drivers/media/test-drivers/vim2m.c @@ -1325,12 +1325,6 @@ static int vim2m_probe(struct platform_device *pdev) vfd->lock = &dev->dev_mutex; vfd->v4l2_dev = &dev->v4l2_dev; - ret = video_register_device(vfd, VFL_TYPE_VIDEO, 0); - if (ret) { - v4l2_err(&dev->v4l2_dev, "Failed to register video device\n"); - goto error_v4l2; - } - video_set_drvdata(vfd, dev); v4l2_info(&dev->v4l2_dev, "Device registered as /dev/video%d\n", vfd->num); @@ -1353,12 +1347,20 @@ static int vim2m_probe(struct platform_device *pdev) media_device_init(&dev->mdev); dev->mdev.ops = &m2m_media_ops; dev->v4l2_dev.mdev = &dev->mdev; +#endif + ret = video_register_device(vfd, VFL_TYPE_VIDEO, 0); + if (ret) { + v4l2_err(&dev->v4l2_dev, "Failed to register video device\n"); + goto error_m2m; + } + +#ifdef CONFIG_MEDIA_CONTROLLER ret = v4l2_m2m_register_media_controller(dev->m2m_dev, vfd, MEDIA_ENT_F_PROC_VIDEO_SCALER); if (ret) { v4l2_err(&dev->v4l2_dev, "Failed to init mem2mem media controller\n"); - goto error_dev; + goto error_v4l2; } ret = media_device_register(&dev->mdev); @@ -1373,11 +1375,13 @@ static int vim2m_probe(struct platform_device *pdev) error_m2m_mc: v4l2_m2m_unregister_media_controller(dev->m2m_dev); #endif -error_dev: +error_v4l2: video_unregister_device(&dev->vfd); /* vim2m_device_release called by video_unregister_device to release various objects */ return ret; -error_v4l2: +error_m2m: + v4l2_m2m_release(dev->m2m_dev); +error_dev: v4l2_device_unregister(&dev->v4l2_dev); error_free: kfree(dev); diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c index 3915d551d59e..fccd1798445d 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c @@ -2569,6 +2569,11 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf, } while (0); mutex_unlock(&pvr2_unit_mtx); + INIT_WORK(&hdw->workpoll, pvr2_hdw_worker_poll); + + if (hdw->unit_number == -1) + goto fail; + cnt1 = 0; cnt2 = scnprintf(hdw->name+cnt1,sizeof(hdw->name)-cnt1,"pvrusb2"); cnt1 += cnt2; @@ -2580,8 +2585,6 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf, if (cnt1 >= sizeof(hdw->name)) cnt1 = sizeof(hdw->name)-1; hdw->name[cnt1] = 0; - INIT_WORK(&hdw->workpoll,pvr2_hdw_worker_poll); - pvr2_trace(PVR2_TRACE_INIT,"Driver unit number is %d, name is %s", hdw->unit_number,hdw->name); diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c index 753b8a99e08f..b40a2b904ace 100644 --- a/drivers/media/usb/uvc/uvc_v4l2.c +++ b/drivers/media/usb/uvc/uvc_v4l2.c @@ -863,29 +863,31 @@ static int uvc_ioctl_enum_input(struct file *file, void *fh, struct uvc_video_chain *chain = handle->chain; const struct uvc_entity *selector = chain->selector; struct uvc_entity *iterm = NULL; + struct uvc_entity *it; u32 index = input->index; - int pin = 0; if (selector == NULL || (chain->dev->quirks & UVC_QUIRK_IGNORE_SELECTOR_UNIT)) { if (index != 0) return -EINVAL; - list_for_each_entry(iterm, &chain->entities, chain) { - if (UVC_ENTITY_IS_ITERM(iterm)) + list_for_each_entry(it, &chain->entities, chain) { + if (UVC_ENTITY_IS_ITERM(it)) { + iterm = it; break; + } } - pin = iterm->id; } else if (index < selector->bNrInPins) { - pin = selector->baSourceID[index]; - list_for_each_entry(iterm, &chain->entities, chain) { - if (!UVC_ENTITY_IS_ITERM(iterm)) + list_for_each_entry(it, &chain->entities, chain) { + if (!UVC_ENTITY_IS_ITERM(it)) continue; - if (iterm->id == pin) + if (it->id == selector->baSourceID[index]) { + iterm = it; break; + } } } - if (iterm == NULL || iterm->id != pin) + if (iterm == NULL) return -EINVAL; memset(input, 0, sizeof(*input)); diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c index 3d230f07eaf2..5cec30d2dc7d 100644 --- a/drivers/memory/samsung/exynos5422-dmc.c +++ b/drivers/memory/samsung/exynos5422-dmc.c @@ -1192,33 +1192,39 @@ static int of_get_dram_timings(struct exynos5_dmc *dmc) dmc->timing_row = devm_kmalloc_array(dmc->dev, TIMING_COUNT, sizeof(u32), GFP_KERNEL); - if (!dmc->timing_row) - return -ENOMEM; + if (!dmc->timing_row) { + ret = -ENOMEM; + goto put_node; + } dmc->timing_data = devm_kmalloc_array(dmc->dev, TIMING_COUNT, sizeof(u32), GFP_KERNEL); - if (!dmc->timing_data) - return -ENOMEM; + if (!dmc->timing_data) { + ret = -ENOMEM; + goto put_node; + } dmc->timing_power = devm_kmalloc_array(dmc->dev, TIMING_COUNT, sizeof(u32), GFP_KERNEL); - if (!dmc->timing_power) - return -ENOMEM; + if (!dmc->timing_power) { + ret = -ENOMEM; + goto put_node; + } dmc->timings = of_lpddr3_get_ddr_timings(np_ddr, dmc->dev, DDR_TYPE_LPDDR3, &dmc->timings_arr_size); if (!dmc->timings) { - of_node_put(np_ddr); dev_warn(dmc->dev, "could not get timings from DT\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } dmc->min_tck = of_lpddr3_get_min_tck(np_ddr, dmc->dev); if (!dmc->min_tck) { - of_node_put(np_ddr); dev_warn(dmc->dev, "could not get tck from DT\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } /* Sorted array of OPPs with frequency ascending */ @@ -1232,13 +1238,14 @@ static int of_get_dram_timings(struct exynos5_dmc *dmc) clk_period_ps); } - of_node_put(np_ddr); /* Take the highest frequency's timings as 'bypass' */ dmc->bypass_timing_row = dmc->timing_row[idx - 1]; dmc->bypass_timing_data = dmc->timing_data[idx - 1]; dmc->bypass_timing_power = dmc->timing_power[idx - 1]; +put_node: + of_node_put(np_ddr); return ret; } @@ -1327,7 +1334,6 @@ static int exynos5_dmc_init_clks(struct exynos5_dmc *dmc) */ static int exynos5_performance_counters_init(struct exynos5_dmc *dmc) { - int counters_size; int ret, i; dmc->num_counters = devfreq_event_get_edev_count(dmc->dev, @@ -1337,8 +1343,8 @@ static int exynos5_performance_counters_init(struct exynos5_dmc *dmc) return dmc->num_counters; } - counters_size = sizeof(struct devfreq_event_dev) * dmc->num_counters; - dmc->counter = devm_kzalloc(dmc->dev, counters_size, GFP_KERNEL); + dmc->counter = devm_kcalloc(dmc->dev, dmc->num_counters, + sizeof(*dmc->counter), GFP_KERNEL); if (!dmc->counter) return -ENOMEM; diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c index e5c8bc998eb4..965820481f1e 100644 --- a/drivers/mfd/davinci_voicecodec.c +++ b/drivers/mfd/davinci_voicecodec.c @@ -46,14 +46,12 @@ static int __init davinci_vc_probe(struct platform_device *pdev) } clk_enable(davinci_vc->clk); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - fifo_base = (dma_addr_t)res->start; - davinci_vc->base = devm_ioremap_resource(&pdev->dev, res); + davinci_vc->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(davinci_vc->base)) { ret = PTR_ERR(davinci_vc->base); goto fail; } + fifo_base = (dma_addr_t)res->start; davinci_vc->regmap = devm_regmap_init_mmio(&pdev->dev, davinci_vc->base, diff --git a/drivers/mfd/ipaq-micro.c b/drivers/mfd/ipaq-micro.c index e92eeeb67a98..4cd5ecc72211 100644 --- a/drivers/mfd/ipaq-micro.c +++ b/drivers/mfd/ipaq-micro.c @@ -403,7 +403,7 @@ static int __init micro_probe(struct platform_device *pdev) micro_reset_comm(micro); irq = platform_get_irq(pdev, 0); - if (!irq) + if (irq < 0) return -EINVAL; ret = devm_request_irq(&pdev->dev, irq, micro_serial_isr, IRQF_SHARED, "ipaq-micro", diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index d6cd5537126c..69f9b0336410 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -232,9 +232,9 @@ static int ssc_probe(struct platform_device *pdev) clk_disable_unprepare(ssc->clk); ssc->irq = platform_get_irq(pdev, 0); - if (!ssc->irq) { + if (ssc->irq < 0) { dev_dbg(&pdev->dev, "could not get irq\n"); - return -ENXIO; + return ssc->irq; } mutex_lock(&user_lock); diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index 59eda55d92a3..f150d8769f19 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -631,16 +631,20 @@ static int rtsx_usb_probe(struct usb_interface *intf, ucr->pusb_dev = usb_dev; - ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE, - GFP_KERNEL, &ucr->iobuf_dma); - if (!ucr->iobuf) + ucr->cmd_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); + if (!ucr->cmd_buf) return -ENOMEM; + ucr->rsp_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); + if (!ucr->rsp_buf) { + ret = -ENOMEM; + goto out_free_cmd_buf; + } + usb_set_intfdata(intf, ucr); ucr->vendor_id = id->idVendor; ucr->product_id = id->idProduct; - ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf; mutex_init(&ucr->dev_mutex); @@ -667,8 +671,12 @@ static int rtsx_usb_probe(struct usb_interface *intf, return 0; out_init_fail: - usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, - ucr->iobuf_dma); + usb_set_intfdata(ucr->pusb_intf, NULL); + kfree(ucr->rsp_buf); + ucr->rsp_buf = NULL; +out_free_cmd_buf: + kfree(ucr->cmd_buf); + ucr->cmd_buf = NULL; return ret; } @@ -681,8 +689,12 @@ static void rtsx_usb_disconnect(struct usb_interface *intf) mfd_remove_devices(&intf->dev); usb_set_intfdata(ucr->pusb_intf, NULL); - usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, - ucr->iobuf_dma); + + kfree(ucr->cmd_buf); + ucr->cmd_buf = NULL; + + kfree(ucr->rsp_buf); + ucr->rsp_buf = NULL; } #ifdef CONFIG_PM diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index d0471fec37fb..65f24b6150aa 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1349,17 +1349,18 @@ static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, struct fastrpc_req_munmap *req) { struct fastrpc_invoke_args args[1] = { [0] = { 0 } }; - struct fastrpc_buf *buf, *b; + struct fastrpc_buf *buf = NULL, *iter, *b; struct fastrpc_munmap_req_msg req_msg; struct device *dev = fl->sctx->dev; int err; u32 sc; spin_lock(&fl->lock); - list_for_each_entry_safe(buf, b, &fl->mmaps, node) { - if ((buf->raddr == req->vaddrout) && (buf->size == req->size)) + list_for_each_entry_safe(iter, b, &fl->mmaps, node) { + if ((iter->raddr == req->vaddrout) && (iter->size == req->size)) { + buf = iter; break; - buf = NULL; + } } spin_unlock(&fl->lock); diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index a337f97b30e2..d39b8139b096 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -231,6 +231,11 @@ void lkdtm_ARRAY_BOUNDS(void) not_checked = kmalloc(sizeof(*not_checked) * 2, GFP_KERNEL); checked = kmalloc(sizeof(*checked) * 2, GFP_KERNEL); + if (!not_checked || !checked) { + kfree(not_checked); + kfree(checked); + return; + } pr_info("Array access within bounds ...\n"); /* For both, touch all bytes in the actual member size. */ diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c index d173d6175c87..8e01c256a9cb 100644 --- a/drivers/misc/lkdtm/usercopy.c +++ b/drivers/misc/lkdtm/usercopy.c @@ -30,12 +30,12 @@ static const unsigned char test_text[] = "This is a test.\n"; */ static noinline unsigned char *trick_compiler(unsigned char *stack) { - return stack + 0; + return stack + unconst; } static noinline unsigned char *do_usercopy_stack_callee(int value) { - unsigned char buf[32]; + unsigned char buf[128]; int i; /* Exercise stack to avoid everything living in registers. */ @@ -43,7 +43,12 @@ static noinline unsigned char *do_usercopy_stack_callee(int value) buf[i] = value & 0xff; } - return trick_compiler(buf); + /* + * Put the target buffer in the middle of stack allocation + * so that we don't step on future stack users regardless + * of stack growth direction. + */ + return trick_compiler(&buf[(128/2)-32]); } static noinline void do_usercopy_stack(bool to_user, bool bad_frame) @@ -66,6 +71,12 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame) bad_stack -= sizeof(unsigned long); } +#ifdef ARCH_HAS_CURRENT_STACK_POINTER + pr_info("stack : %px\n", (void *)current_stack_pointer); +#endif + pr_info("good_stack: %px-%px\n", good_stack, good_stack + sizeof(good_stack)); + pr_info("bad_stack : %px-%px\n", bad_stack, bad_stack + sizeof(good_stack)); + user_addr = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, 0); diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index d81d75a20b8f..afb2e78df4d6 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -109,6 +109,8 @@ #define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */ #define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */ +#define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index a738253dbd05..5324b65d0d29 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -115,6 +115,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index 4d1b44de1492..c742ab02ae18 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -558,7 +558,9 @@ int ocxl_file_register_afu(struct ocxl_afu *afu) err_unregister: ocxl_sysfs_unregister_afu(info); // safe to call even if register failed + free_minor(info); device_unregister(&info->dev); + return rc; err_put: ocxl_afu_put(afu); free_minor(info); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 14dddf05f0ac..38c181294e98 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1452,8 +1452,7 @@ void mmc_blk_cqe_recovery(struct mmc_queue *mq) err = mmc_cqe_recovery(host); if (err) mmc_blk_reset(mq->blkdata, host, MMC_BLK_CQE_RECOVERY); - else - mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY); + mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY); pr_debug("%s: CQE recovery done\n", mmc_hostname(host)); } diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index a1f92fed2a55..aa3dfb9c1071 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -236,6 +236,26 @@ static int jz4740_mmc_acquire_dma_channels(struct jz4740_mmc_host *host) return PTR_ERR(host->dma_rx); } + /* + * Limit the maximum segment size in any SG entry according to + * the parameters of the DMA engine device. + */ + if (host->dma_tx) { + struct device *dev = host->dma_tx->device->dev; + unsigned int max_seg_size = dma_get_max_seg_size(dev); + + if (max_seg_size < host->mmc->max_seg_size) + host->mmc->max_seg_size = max_seg_size; + } + + if (host->dma_rx) { + struct device *dev = host->dma_rx->device->dev; + unsigned int max_seg_size = dma_get_max_seg_size(dev); + + if (max_seg_size < host->mmc->max_seg_size) + host->mmc->max_seg_size = max_seg_size; + } + return 0; } diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index 94e3f72f6405..8c357e3b78d7 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -147,6 +147,8 @@ static int sdhci_o2_get_cd(struct mmc_host *mmc) if (!(sdhci_readw(host, O2_PLL_DLL_WDT_CONTROL1) & O2_PLL_LOCK_STATUS)) sdhci_o2_enable_internal_clock(host); + else + sdhci_o2_wait_card_detect_stable(host); return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT); } diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index a64ea143d185..7cab9d831afb 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -147,6 +147,9 @@ struct sdhci_am654_data { int drv_strength; int strb_sel; u32 flags; + u32 quirks; + +#define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0) }; struct sdhci_am654_driver_data { @@ -369,6 +372,21 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg) } } +static void sdhci_am654_reset(struct sdhci_host *host, u8 mask) +{ + u8 ctrl; + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + + sdhci_reset(host, mask); + + if (sdhci_am654->quirks & SDHCI_AM654_QUIRK_FORCE_CDTEST) { + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); + ctrl |= SDHCI_CTRL_CDTEST_INS | SDHCI_CTRL_CDTEST_EN; + sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); + } +} + static int sdhci_am654_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct sdhci_host *host = mmc_priv(mmc); @@ -500,7 +518,7 @@ static struct sdhci_ops sdhci_j721e_4bit_ops = { .set_clock = sdhci_j721e_4bit_set_clock, .write_b = sdhci_am654_write_b, .irq = sdhci_am654_cqhci_irq, - .reset = sdhci_reset, + .reset = sdhci_am654_reset, }; static const struct sdhci_pltfm_data sdhci_j721e_4bit_pdata = { @@ -719,6 +737,9 @@ static int sdhci_am654_get_of_property(struct platform_device *pdev, device_property_read_u32(dev, "ti,clkbuf-sel", &sdhci_am654->clkbuf_sel); + if (device_property_read_bool(dev, "ti,fails-without-test-cd")) + sdhci_am654->quirks |= SDHCI_AM654_QUIRK_FORCE_CDTEST; + sdhci_get_of_property(pdev); return 0; diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 96a27e06401f..9bd65f3f805c 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -59,6 +59,10 @@ #define CFI_SR_WBASB BIT(3) #define CFI_SR_SLSB BIT(1) +enum cfi_quirks { + CFI_QUIRK_DQ_TRUE_DATA = BIT(0), +}; + static int cfi_amdstd_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *); static int cfi_amdstd_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); #if !FORCE_WORD_WRITE @@ -432,6 +436,15 @@ static void fixup_s29ns512p_sectors(struct mtd_info *mtd) mtd->name); } +static void fixup_quirks(struct mtd_info *mtd) +{ + struct map_info *map = mtd->priv; + struct cfi_private *cfi = map->fldrv_priv; + + if (cfi->mfr == CFI_MFR_AMD && cfi->id == 0x0c01) + cfi->quirks |= CFI_QUIRK_DQ_TRUE_DATA; +} + /* Used to fix CFI-Tables of chips without Extended Query Tables */ static struct cfi_fixup cfi_nopri_fixup_table[] = { { CFI_MFR_SST, 0x234a, fixup_sst39vf }, /* SST39VF1602 */ @@ -470,6 +483,7 @@ static struct cfi_fixup cfi_fixup_table[] = { #if !FORCE_WORD_WRITE { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers }, #endif + { CFI_MFR_ANY, CFI_ID_ANY, fixup_quirks }, { 0, 0, NULL } }; static struct cfi_fixup jedec_fixup_table[] = { @@ -797,47 +811,11 @@ static struct mtd_info *cfi_amdstd_setup(struct mtd_info *mtd) return NULL; } -/* - * Return true if the chip is ready. - * - * Ready is one of: read mode, query mode, erase-suspend-read mode (in any - * non-suspended sector) and is indicated by no toggle bits toggling. - * - * Note that anything more complicated than checking if no bits are toggling - * (including checking DQ5 for an error status) is tricky to get working - * correctly and is therefore not done (particularly with interleaved chips - * as each chip must be checked independently of the others). - */ -static int __xipram chip_ready(struct map_info *map, struct flchip *chip, - unsigned long addr) -{ - struct cfi_private *cfi = map->fldrv_priv; - map_word d, t; - - if (cfi_use_status_reg(cfi)) { - map_word ready = CMD(CFI_SR_DRB); - /* - * For chips that support status register, check device - * ready bit - */ - cfi_send_gen_cmd(0x70, cfi->addr_unlock1, chip->start, map, cfi, - cfi->device_type, NULL); - d = map_read(map, addr); - - return map_word_andequal(map, d, ready, ready); - } - - d = map_read(map, addr); - t = map_read(map, addr); - - return map_word_equal(map, d, t); -} - /* * Return true if the chip is ready and has the correct value. * * Ready is one of: read mode, query mode, erase-suspend-read mode (in any - * non-suspended sector) and it is indicated by no bits toggling. + * non-suspended sector) and is indicated by no toggle bits toggling. * * Error are indicated by toggling bits or bits held with the wrong value, * or with bits toggling. @@ -846,33 +824,48 @@ static int __xipram chip_ready(struct map_info *map, struct flchip *chip, * (including checking DQ5 for an error status) is tricky to get working * correctly and is therefore not done (particularly with interleaved chips * as each chip must be checked independently of the others). - * */ -static int __xipram chip_good(struct map_info *map, struct flchip *chip, - unsigned long addr, map_word expected) +static int __xipram chip_ready(struct map_info *map, struct flchip *chip, + unsigned long addr, map_word *expected) { struct cfi_private *cfi = map->fldrv_priv; - map_word oldd, curd; + map_word d, t; + int ret; if (cfi_use_status_reg(cfi)) { map_word ready = CMD(CFI_SR_DRB); - /* * For chips that support status register, check device * ready bit */ cfi_send_gen_cmd(0x70, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); - curd = map_read(map, addr); + t = map_read(map, addr); - return map_word_andequal(map, curd, ready, ready); + return map_word_andequal(map, t, ready, ready); } - oldd = map_read(map, addr); - curd = map_read(map, addr); + d = map_read(map, addr); + t = map_read(map, addr); - return map_word_equal(map, oldd, curd) && - map_word_equal(map, curd, expected); + ret = map_word_equal(map, d, t); + + if (!ret || !expected) + return ret; + + return map_word_equal(map, t, *expected); +} + +static int __xipram chip_good(struct map_info *map, struct flchip *chip, + unsigned long addr, map_word *expected) +{ + struct cfi_private *cfi = map->fldrv_priv; + map_word *datum = expected; + + if (cfi->quirks & CFI_QUIRK_DQ_TRUE_DATA) + datum = NULL; + + return chip_ready(map, chip, addr, datum); } static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode) @@ -889,7 +882,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr case FL_STATUS: for (;;) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) break; if (time_after(jiffies, timeo)) { @@ -927,7 +920,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr chip->state = FL_ERASE_SUSPENDING; chip->erase_suspended = 1; for (;;) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) break; if (time_after(jiffies, timeo)) { @@ -1458,7 +1451,7 @@ static int do_otp_lock(struct map_info *map, struct flchip *chip, loff_t adr, /* wait for chip to become ready */ timeo = jiffies + msecs_to_jiffies(2); for (;;) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) break; if (time_after(jiffies, timeo)) { @@ -1694,7 +1687,7 @@ static int __xipram do_write_oneword_once(struct map_info *map, * "chip_good" to avoid the failure due to scheduling. */ if (time_after(jiffies, timeo) && - !chip_good(map, chip, adr, datum)) { + !chip_good(map, chip, adr, &datum)) { xip_enable(map, chip, adr); printk(KERN_WARNING "MTD %s(): software timeout\n", __func__); xip_disable(map, chip, adr); @@ -1702,7 +1695,7 @@ static int __xipram do_write_oneword_once(struct map_info *map, break; } - if (chip_good(map, chip, adr, datum)) { + if (chip_good(map, chip, adr, &datum)) { if (cfi_check_err_status(map, chip, adr)) ret = -EIO; break; @@ -1974,14 +1967,14 @@ static int __xipram do_write_buffer_wait(struct map_info *map, * "chip_good" to avoid the failure due to scheduling. */ if (time_after(jiffies, timeo) && - !chip_good(map, chip, adr, datum)) { + !chip_good(map, chip, adr, &datum)) { pr_err("MTD %s(): software timeout, address:0x%.8lx.\n", __func__, adr); ret = -EIO; break; } - if (chip_good(map, chip, adr, datum)) { + if (chip_good(map, chip, adr, &datum)) { if (cfi_check_err_status(map, chip, adr)) ret = -EIO; break; @@ -2190,7 +2183,7 @@ static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip, * If the driver thinks the chip is idle, and no toggle bits * are changing, then the chip is actually idle for sure. */ - if (chip->state == FL_READY && chip_ready(map, chip, adr)) + if (chip->state == FL_READY && chip_ready(map, chip, adr, NULL)) return 0; /* @@ -2207,7 +2200,7 @@ static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip, /* wait for the chip to become ready */ for (i = 0; i < jiffies_to_usecs(timeo); i++) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) return 0; udelay(1); @@ -2271,13 +2264,13 @@ static int do_panic_write_oneword(struct map_info *map, struct flchip *chip, map_write(map, datum, adr); for (i = 0; i < jiffies_to_usecs(uWriteTimeout); i++) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) break; udelay(1); } - if (!chip_good(map, chip, adr, datum) || + if (!chip_ready(map, chip, adr, &datum) || cfi_check_err_status(map, chip, adr)) { /* reset on all failures. */ map_write(map, CMD(0xF0), chip->start); @@ -2419,6 +2412,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) DECLARE_WAITQUEUE(wait, current); int ret; int retry_cnt = 0; + map_word datum = map_word_ff(map); adr = cfi->addr_unlock1; @@ -2473,7 +2467,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip) chip->erase_suspended = 0; } - if (chip_good(map, chip, adr, map_word_ff(map))) { + if (chip_ready(map, chip, adr, &datum)) { if (cfi_check_err_status(map, chip, adr)) ret = -EIO; break; @@ -2518,6 +2512,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, DECLARE_WAITQUEUE(wait, current); int ret; int retry_cnt = 0; + map_word datum = map_word_ff(map); adr += chip->start; @@ -2572,7 +2567,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, chip->erase_suspended = 0; } - if (chip_good(map, chip, adr, map_word_ff(map))) { + if (chip_ready(map, chip, adr, &datum)) { if (cfi_check_err_status(map, chip, adr)) ret = -EIO; break; @@ -2766,7 +2761,7 @@ static int __maybe_unused do_ppb_xxlock(struct map_info *map, */ timeo = jiffies + msecs_to_jiffies(2000); /* 2s max (un)locking */ for (;;) { - if (chip_ready(map, chip, adr)) + if (chip_ready(map, chip, adr, NULL)) break; if (time_after(jiffies, timeo)) { diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index b46786cd53e0..4fdb39214a12 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c @@ -2983,11 +2983,10 @@ static int cadence_nand_dt_probe(struct platform_device *ofdev) if (IS_ERR(cdns_ctrl->reg)) return PTR_ERR(cdns_ctrl->reg); - res = platform_get_resource(ofdev, IORESOURCE_MEM, 1); - cdns_ctrl->io.dma = res->start; - cdns_ctrl->io.virt = devm_ioremap_resource(&ofdev->dev, res); + cdns_ctrl->io.virt = devm_platform_get_and_ioremap_resource(ofdev, 1, &res); if (IS_ERR(cdns_ctrl->io.virt)) return PTR_ERR(cdns_ctrl->io.virt); + cdns_ctrl->io.dma = res->start; dt->clk = devm_clk_get(cdns_ctrl->dev, "nf_clk"); if (IS_ERR(dt->clk)) diff --git a/drivers/mtd/nand/raw/denali_pci.c b/drivers/mtd/nand/raw/denali_pci.c index 20c085a30adc..de7e722d3826 100644 --- a/drivers/mtd/nand/raw/denali_pci.c +++ b/drivers/mtd/nand/raw/denali_pci.c @@ -74,22 +74,21 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) return ret; } - denali->reg = ioremap(csr_base, csr_len); + denali->reg = devm_ioremap(denali->dev, csr_base, csr_len); if (!denali->reg) { dev_err(&dev->dev, "Spectra: Unable to remap memory region\n"); return -ENOMEM; } - denali->host = ioremap(mem_base, mem_len); + denali->host = devm_ioremap(denali->dev, mem_base, mem_len); if (!denali->host) { dev_err(&dev->dev, "Spectra: ioremap failed!"); - ret = -ENOMEM; - goto out_unmap_reg; + return -ENOMEM; } ret = denali_init(denali); if (ret) - goto out_unmap_host; + return ret; nsels = denali->nbanks; @@ -117,10 +116,6 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) out_remove_denali: denali_remove(denali); -out_unmap_host: - iounmap(denali->host); -out_unmap_reg: - iounmap(denali->reg); return ret; } @@ -129,8 +124,6 @@ static void denali_pci_remove(struct pci_dev *dev) struct denali_controller *denali = pci_get_drvdata(dev); denali_remove(denali); - iounmap(denali->reg); - iounmap(denali->host); } static struct pci_driver denali_pci_driver = { diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 2b26a875a855..e8146a47da12 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -827,6 +827,15 @@ static int spi_nor_write_16bit_sr_and_check(struct spi_nor *nor, u8 sr1) if (ret) return ret; + ret = spi_nor_read_sr(nor, sr_cr); + if (ret) + return ret; + + if (sr1 != sr_cr[0]) { + dev_dbg(nor->dev, "SR: Read back test failed\n"); + return -EIO; + } + if (nor->flags & SNOR_F_NO_READ_CR) return 0; diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 28f55f9cf715..053ab52668e8 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -97,6 +97,33 @@ struct ubi_wl_entry *ubi_wl_get_fm_peb(struct ubi_device *ubi, int anchor) return e; } +/* + * has_enough_free_count - whether ubi has enough free pebs to fill fm pools + * @ubi: UBI device description object + * @is_wl_pool: whether UBI is filling wear leveling pool + * + * This helper function checks whether there are enough free pebs (deducted + * by fastmap pebs) to fill fm_pool and fm_wl_pool, above rule works after + * there is at least one of free pebs is filled into fm_wl_pool. + * For wear leveling pool, UBI should also reserve free pebs for bad pebs + * handling, because there maybe no enough free pebs for user volumes after + * producing new bad pebs. + */ +static bool has_enough_free_count(struct ubi_device *ubi, bool is_wl_pool) +{ + int fm_used = 0; // fastmap non anchor pebs. + int beb_rsvd_pebs; + + if (!ubi->free.rb_node) + return false; + + beb_rsvd_pebs = is_wl_pool ? ubi->beb_rsvd_pebs : 0; + if (ubi->fm_wl_pool.size > 0 && !(ubi->ro_mode || ubi->fm_disabled)) + fm_used = ubi->fm_size / ubi->leb_size - 1; + + return ubi->free_count - beb_rsvd_pebs > fm_used; +} + /** * ubi_refill_pools - refills all fastmap PEB pools. * @ubi: UBI device description object @@ -120,21 +147,17 @@ void ubi_refill_pools(struct ubi_device *ubi) wl_tree_add(ubi->fm_anchor, &ubi->free); ubi->free_count++; } - if (ubi->fm_next_anchor) { - wl_tree_add(ubi->fm_next_anchor, &ubi->free); - ubi->free_count++; - } - /* All available PEBs are in ubi->free, now is the time to get + /* + * All available PEBs are in ubi->free, now is the time to get * the best anchor PEBs. */ ubi->fm_anchor = ubi_wl_get_fm_peb(ubi, 1); - ubi->fm_next_anchor = ubi_wl_get_fm_peb(ubi, 1); for (;;) { enough = 0; if (pool->size < pool->max_size) { - if (!ubi->free.rb_node) + if (!has_enough_free_count(ubi, false)) break; e = wl_get_wle(ubi); @@ -147,8 +170,7 @@ void ubi_refill_pools(struct ubi_device *ubi) enough++; if (wl_pool->size < wl_pool->max_size) { - if (!ubi->free.rb_node || - (ubi->free_count - ubi->beb_rsvd_pebs < 5)) + if (!has_enough_free_count(ubi, true)) break; e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); @@ -286,20 +308,26 @@ static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) int ubi_ensure_anchor_pebs(struct ubi_device *ubi) { struct ubi_work *wrk; + struct ubi_wl_entry *anchor; spin_lock(&ubi->wl_lock); - /* Do we have a next anchor? */ - if (!ubi->fm_next_anchor) { - ubi->fm_next_anchor = ubi_wl_get_fm_peb(ubi, 1); - if (!ubi->fm_next_anchor) - /* Tell wear leveling to produce a new anchor PEB */ - ubi->fm_do_produce_anchor = 1; + /* Do we already have an anchor? */ + if (ubi->fm_anchor) { + spin_unlock(&ubi->wl_lock); + return 0; } - /* Do wear leveling to get a new anchor PEB or check the - * existing next anchor candidate. - */ + /* See if we can find an anchor PEB on the list of free PEBs */ + anchor = ubi_wl_get_fm_peb(ubi, 1); + if (anchor) { + ubi->fm_anchor = anchor; + spin_unlock(&ubi->wl_lock); + return 0; + } + + ubi->fm_do_produce_anchor = 1; + /* No luck, trigger wear leveling to produce a new anchor PEB. */ if (ubi->wl_scheduled) { spin_unlock(&ubi->wl_lock); return 0; @@ -381,11 +409,6 @@ static void ubi_fastmap_close(struct ubi_device *ubi) ubi->fm_anchor = NULL; } - if (ubi->fm_next_anchor) { - return_unused_peb(ubi, ubi->fm_next_anchor); - ubi->fm_next_anchor = NULL; - } - if (ubi->fm) { for (i = 0; i < ubi->fm->used_blocks; i++) kfree(ubi->fm->e[i]); diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 6b5f1ffd961b..6e95c4b1473e 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1230,17 +1230,6 @@ static int ubi_write_fastmap(struct ubi_device *ubi, fm_pos += sizeof(*fec); ubi_assert(fm_pos <= ubi->fm_size); } - if (ubi->fm_next_anchor) { - fec = (struct ubi_fm_ec *)(fm_raw + fm_pos); - - fec->pnum = cpu_to_be32(ubi->fm_next_anchor->pnum); - set_seen(ubi, ubi->fm_next_anchor->pnum, seen_pebs); - fec->ec = cpu_to_be32(ubi->fm_next_anchor->ec); - - free_peb_count++; - fm_pos += sizeof(*fec); - ubi_assert(fm_pos <= ubi->fm_size); - } fmh->free_peb_count = cpu_to_be32(free_peb_count); ubi_for_each_used_peb(ubi, wl_e, tmp_rb) { diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index c2da77163f94..da0bee13fe7f 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -491,8 +491,7 @@ struct ubi_debug_info { * @fm_work: fastmap work queue * @fm_work_scheduled: non-zero if fastmap work was scheduled * @fast_attach: non-zero if UBI was attached by fastmap - * @fm_anchor: The new anchor PEB used during fastmap update - * @fm_next_anchor: An anchor PEB candidate for the next time fastmap is updated + * @fm_anchor: The next anchor PEB to use for fastmap * @fm_do_produce_anchor: If true produce an anchor PEB in wl * * @used: RB-tree of used physical eraseblocks @@ -603,7 +602,6 @@ struct ubi_device { int fm_work_scheduled; int fast_attach; struct ubi_wl_entry *fm_anchor; - struct ubi_wl_entry *fm_next_anchor; int fm_do_produce_anchor; /* Wear-leveling sub-system's stuff */ diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 1bc7b3a05604..6ea95ade4ca6 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -309,7 +309,6 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) ubi->volumes[vol_id] = NULL; ubi->vol_count -= 1; spin_unlock(&ubi->volumes_lock); - ubi_eba_destroy_table(eba_tbl); out_acc: spin_lock(&ubi->volumes_lock); ubi->rsvd_pebs -= vol->reserved_pebs; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 7847de75a74c..820b5c1c8e8e 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -688,16 +688,16 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, #ifdef CONFIG_MTD_UBI_FASTMAP e1 = find_anchor_wl_entry(&ubi->used); - if (e1 && ubi->fm_next_anchor && - (ubi->fm_next_anchor->ec - e1->ec >= UBI_WL_THRESHOLD)) { + if (e1 && ubi->fm_anchor && + (ubi->fm_anchor->ec - e1->ec >= UBI_WL_THRESHOLD)) { ubi->fm_do_produce_anchor = 1; - /* fm_next_anchor is no longer considered a good anchor - * candidate. + /* + * fm_anchor is no longer considered a good anchor. * NULL assignment also prevents multiple wear level checks * of this PEB. */ - wl_tree_add(ubi->fm_next_anchor, &ubi->free); - ubi->fm_next_anchor = NULL; + wl_tree_add(ubi->fm_anchor, &ubi->free); + ubi->fm_anchor = NULL; ubi->free_count++; } @@ -1086,12 +1086,13 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk) if (!err) { spin_lock(&ubi->wl_lock); - if (!ubi->fm_disabled && !ubi->fm_next_anchor && + if (!ubi->fm_disabled && !ubi->fm_anchor && e->pnum < UBI_FM_MAX_START) { - /* Abort anchor production, if needed it will be + /* + * Abort anchor production, if needed it will be * enabled again in the wear leveling started below. */ - ubi->fm_next_anchor = e; + ubi->fm_anchor = e; ubi->fm_do_produce_anchor = 0; } else { wl_tree_add(e, &ubi->free); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5be850effd29..72d85214e1b5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -80,7 +80,6 @@ config WIREGUARD select CRYPTO select CRYPTO_LIB_CURVE25519 select CRYPTO_LIB_CHACHA20POLY1305 - select CRYPTO_LIB_BLAKE2S select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT select CRYPTO_POLY1305_X86_64 if X86 && 64BIT select CRYPTO_BLAKE2S_X86 if X86 && 64BIT diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index c2cef7ba2671..325b20729d8b 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2209,7 +2209,8 @@ void bond_3ad_unbind_slave(struct slave *slave) temp_aggregator->num_of_ports--; if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; - ad_clear_agg(temp_aggregator); + if (temp_aggregator->num_of_ports == 0) + ad_clear_agg(temp_aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 0436aef9c9ef..152f76f86927 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1279,12 +1279,12 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled) return res; if (rlb_enabled) { - bond->alb_info.rlb_enabled = 1; res = rlb_initialize(bond); if (res) { tlb_deinitialize(bond); return res; } + bond->alb_info.rlb_enabled = 1; } else { bond->alb_info.rlb_enabled = 0; } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index cbeb69bca0bb..9c4b45341fd2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3368,9 +3368,11 @@ static void bond_activebackup_arp_mon(struct bonding *bond) if (!rtnl_trylock()) return; - if (should_notify_peers) + if (should_notify_peers) { + bond->send_peer_notif--; call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + } if (should_notify_rtnl) { bond_slave_state_notify(bond); bond_slave_link_notify(bond); diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 47a6d62b7511..a701932f5cc2 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -723,13 +723,21 @@ static int cfv_probe(struct virtio_device *vdev) /* Carrier is off until netdevice is opened */ netif_carrier_off(netdev); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ + rtnl_lock(); + /* register Netdev */ - err = register_netdev(netdev); + err = register_netdevice(netdev); if (err) { + rtnl_unlock(); dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err); goto err; } + virtio_device_ready(vdev); + + rtnl_unlock(); + debugfs_init(cfv); return 0; diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index 4923f9ac6a43..3794a7b2c64e 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1659,7 +1659,6 @@ static int grcan_probe(struct platform_device *ofdev) */ sysid_parent = of_find_node_by_path("/ambapp0"); if (sysid_parent) { - of_node_get(sysid_parent); err = of_property_read_u32(sysid_parent, "systemid", &sysid); if (!err && ((sysid & GRLIB_VERSION_MASK) >= GRCAN_TXBUG_SAFE_GRLIB_VERSION)) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index fa1246e39980..766dbd19bba6 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -426,7 +426,7 @@ struct mcp251xfd_hw_tef_obj { /* The tx_obj_raw version is used in spi async, i.e. without * regmap. We have to take care of endianness ourselves. */ -struct mcp251xfd_hw_tx_obj_raw { +struct __packed mcp251xfd_hw_tx_obj_raw { __le32 id; __le32 flags; u8 data[sizeof_field(struct canfd_frame, data)]; diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index e023c401f4f7..1bfc497da9ac 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -184,6 +184,8 @@ struct gs_can { struct usb_anchor tx_submitted; atomic_t active_tx_urbs; + void *rxbuf[GS_MAX_RX_URBS]; + dma_addr_t rxbuf_dma[GS_MAX_RX_URBS]; }; /* usb interface struct */ @@ -592,6 +594,7 @@ static int gs_can_open(struct net_device *netdev) for (i = 0; i < GS_MAX_RX_URBS; i++) { struct urb *urb; u8 *buf; + dma_addr_t buf_dma; /* alloc rx urb */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -602,7 +605,7 @@ static int gs_can_open(struct net_device *netdev) buf = usb_alloc_coherent(dev->udev, sizeof(struct gs_host_frame), GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); @@ -610,6 +613,8 @@ static int gs_can_open(struct net_device *netdev) return -ENOMEM; } + urb->transfer_dma = buf_dma; + /* fill, anchor, and submit rx urb */ usb_fill_bulk_urb(urb, dev->udev, @@ -633,10 +638,17 @@ static int gs_can_open(struct net_device *netdev) rc); usb_unanchor_urb(urb); + usb_free_coherent(dev->udev, + sizeof(struct gs_host_frame), + buf, + buf_dma); usb_free_urb(urb); break; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + /* Drop reference, * USB core will take care of freeing it */ @@ -701,13 +713,20 @@ static int gs_can_close(struct net_device *netdev) int rc; struct gs_can *dev = netdev_priv(netdev); struct gs_usb *parent = dev->parent; + unsigned int i; netif_stop_queue(netdev); /* Stop polling */ parent->active_channels--; - if (!parent->active_channels) + if (!parent->active_channels) { usb_kill_anchored_urbs(&parent->rx_submitted); + for (i = 0; i < GS_MAX_RX_URBS; i++) + usb_free_coherent(dev->udev, + sizeof(struct gs_host_frame), + dev->rxbuf[i], + dev->rxbuf_dma[i]); + } /* Stop sending URBs */ usb_kill_anchored_urbs(&dev->tx_submitted); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h index 390b6bde883c..61e67986b625 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h @@ -35,9 +35,10 @@ #define KVASER_USB_RX_BUFFER_SIZE 3072 #define KVASER_USB_MAX_NET_DEVICES 5 -/* USB devices features */ -#define KVASER_USB_HAS_SILENT_MODE BIT(0) -#define KVASER_USB_HAS_TXRX_ERRORS BIT(1) +/* Kvaser USB device quirks */ +#define KVASER_USB_QUIRK_HAS_SILENT_MODE BIT(0) +#define KVASER_USB_QUIRK_HAS_TXRX_ERRORS BIT(1) +#define KVASER_USB_QUIRK_IGNORE_CLK_FREQ BIT(2) /* Device capabilities */ #define KVASER_USB_CAP_BERR_CAP 0x01 @@ -65,12 +66,7 @@ struct kvaser_usb_dev_card_data_hydra { struct kvaser_usb_dev_card_data { u32 ctrlmode_supported; u32 capabilities; - union { - struct { - enum kvaser_usb_leaf_family family; - } leaf; - struct kvaser_usb_dev_card_data_hydra hydra; - }; + struct kvaser_usb_dev_card_data_hydra hydra; }; /* Context for an outstanding, not yet ACKed, transmission */ @@ -84,7 +80,7 @@ struct kvaser_usb { struct usb_device *udev; struct usb_interface *intf; struct kvaser_usb_net_priv *nets[KVASER_USB_MAX_NET_DEVICES]; - const struct kvaser_usb_dev_ops *ops; + const struct kvaser_usb_driver_info *driver_info; const struct kvaser_usb_dev_cfg *cfg; struct usb_endpoint_descriptor *bulk_in, *bulk_out; @@ -166,6 +162,12 @@ struct kvaser_usb_dev_ops { int *cmd_len, u16 transid); }; +struct kvaser_usb_driver_info { + u32 quirks; + enum kvaser_usb_leaf_family family; + const struct kvaser_usb_dev_ops *ops; +}; + struct kvaser_usb_dev_cfg { const struct can_clock clock; const unsigned int timestamp_freq; @@ -185,4 +187,7 @@ int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd, int len); int kvaser_usb_can_rx_over_error(struct net_device *netdev); + +extern const struct can_bittiming_const kvaser_usb_flexc_bittiming_const; + #endif /* KVASER_USB_H */ diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 0f1d3e807d63..416763fd1f11 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -79,104 +79,134 @@ #define USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID 269 #define USB_HYBRID_PRO_CANLIN_PRODUCT_ID 270 -static inline bool kvaser_is_leaf(const struct usb_device_id *id) -{ - return (id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID && - id->idProduct <= USB_CAN_R_PRODUCT_ID) || - (id->idProduct >= USB_LEAF_LITE_V2_PRODUCT_ID && - id->idProduct <= USB_MINI_PCIE_2HS_PRODUCT_ID); -} +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_hydra = { + .quirks = 0, + .ops = &kvaser_usb_hydra_dev_ops, +}; -static inline bool kvaser_is_usbcan(const struct usb_device_id *id) -{ - return id->idProduct >= USB_USBCAN_REVB_PRODUCT_ID && - id->idProduct <= USB_MEMORATOR_PRODUCT_ID; -} +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_usbcan = { + .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | + KVASER_USB_QUIRK_HAS_SILENT_MODE, + .family = KVASER_USBCAN, + .ops = &kvaser_usb_leaf_dev_ops, +}; -static inline bool kvaser_is_hydra(const struct usb_device_id *id) -{ - return id->idProduct >= USB_BLACKBIRD_V2_PRODUCT_ID && - id->idProduct <= USB_HYBRID_PRO_CANLIN_PRODUCT_ID; -} +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf = { + .quirks = KVASER_USB_QUIRK_IGNORE_CLK_FREQ, + .family = KVASER_LEAF, + .ops = &kvaser_usb_leaf_dev_ops, +}; + +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err = { + .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | + KVASER_USB_QUIRK_IGNORE_CLK_FREQ, + .family = KVASER_LEAF, + .ops = &kvaser_usb_leaf_dev_ops, +}; + +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_listen = { + .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | + KVASER_USB_QUIRK_HAS_SILENT_MODE | + KVASER_USB_QUIRK_IGNORE_CLK_FREQ, + .family = KVASER_LEAF, + .ops = &kvaser_usb_leaf_dev_ops, +}; + +static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leafimx = { + .quirks = 0, + .ops = &kvaser_usb_leaf_dev_ops, +}; static const struct usb_device_id kvaser_usb_table[] = { - /* Leaf USB product IDs */ - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID) }, + /* Leaf M32C USB product IDs */ + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_SWC_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LIN_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_LS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_SWC_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_DEVEL_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSHS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_UPRO_HSHS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID) }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_OBDII_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS | - KVASER_USB_HAS_SILENT_MODE }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSLS_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_CH_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_SPRO_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_MERCURY_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_LEAF_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_CAN_R_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, + + /* Leaf i.MX28 USB product IDs */ + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, /* USBCANII USB product IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_REVB_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMORATOR_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_VCI2_PRODUCT_ID), - .driver_info = KVASER_USB_HAS_TXRX_ERRORS }, + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, /* Minihydra USB product IDs */ - { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID) }, - { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID) }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID), + .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { } }; MODULE_DEVICE_TABLE(usb, kvaser_usb_table); @@ -267,6 +297,7 @@ int kvaser_usb_can_rx_over_error(struct net_device *netdev) static void kvaser_usb_read_bulk_callback(struct urb *urb) { struct kvaser_usb *dev = urb->context; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; unsigned int i; @@ -283,8 +314,8 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) goto resubmit_urb; } - dev->ops->dev_read_bulk_callback(dev, urb->transfer_buffer, - urb->actual_length); + ops->dev_read_bulk_callback(dev, urb->transfer_buffer, + urb->actual_length); resubmit_urb: usb_fill_bulk_urb(urb, dev->udev, @@ -378,6 +409,7 @@ static int kvaser_usb_open(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; err = open_candev(netdev); @@ -388,11 +420,11 @@ static int kvaser_usb_open(struct net_device *netdev) if (err) goto error; - err = dev->ops->dev_set_opt_mode(priv); + err = ops->dev_set_opt_mode(priv); if (err) goto error; - err = dev->ops->dev_start_chip(priv); + err = ops->dev_start_chip(priv); if (err) { netdev_warn(netdev, "Cannot start device, error %d\n", err); goto error; @@ -449,22 +481,23 @@ static int kvaser_usb_close(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; netif_stop_queue(netdev); - err = dev->ops->dev_flush_queue(priv); + err = ops->dev_flush_queue(priv); if (err) netdev_warn(netdev, "Cannot flush queue, error %d\n", err); - if (dev->ops->dev_reset_chip) { - err = dev->ops->dev_reset_chip(dev, priv->channel); + if (ops->dev_reset_chip) { + err = ops->dev_reset_chip(dev, priv->channel); if (err) netdev_warn(netdev, "Cannot reset card, error %d\n", err); } - err = dev->ops->dev_stop_chip(priv); + err = ops->dev_stop_chip(priv); if (err) netdev_warn(netdev, "Cannot stop device, error %d\n", err); @@ -503,6 +536,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; + const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; struct net_device_stats *stats = &netdev->stats; struct kvaser_usb_tx_urb_context *context = NULL; struct urb *urb; @@ -545,8 +579,8 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, goto freeurb; } - buf = dev->ops->dev_frame_to_cmd(priv, skb, &context->dlc, &cmd_len, - context->echo_index); + buf = ops->dev_frame_to_cmd(priv, skb, &context->dlc, &cmd_len, + context->echo_index); if (!buf) { stats->tx_dropped++; dev_kfree_skb(skb); @@ -630,15 +664,16 @@ static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev) } } -static int kvaser_usb_init_one(struct kvaser_usb *dev, - const struct usb_device_id *id, int channel) +static int kvaser_usb_init_one(struct kvaser_usb *dev, int channel) { struct net_device *netdev; struct kvaser_usb_net_priv *priv; + const struct kvaser_usb_driver_info *driver_info = dev->driver_info; + const struct kvaser_usb_dev_ops *ops = driver_info->ops; int err; - if (dev->ops->dev_reset_chip) { - err = dev->ops->dev_reset_chip(dev, channel); + if (ops->dev_reset_chip) { + err = ops->dev_reset_chip(dev, channel); if (err) return err; } @@ -667,20 +702,19 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, priv->can.state = CAN_STATE_STOPPED; priv->can.clock.freq = dev->cfg->clock.freq; priv->can.bittiming_const = dev->cfg->bittiming_const; - priv->can.do_set_bittiming = dev->ops->dev_set_bittiming; - priv->can.do_set_mode = dev->ops->dev_set_mode; - if ((id->driver_info & KVASER_USB_HAS_TXRX_ERRORS) || + priv->can.do_set_bittiming = ops->dev_set_bittiming; + priv->can.do_set_mode = ops->dev_set_mode; + if ((driver_info->quirks & KVASER_USB_QUIRK_HAS_TXRX_ERRORS) || (priv->dev->card_data.capabilities & KVASER_USB_CAP_BERR_CAP)) - priv->can.do_get_berr_counter = dev->ops->dev_get_berr_counter; - if (id->driver_info & KVASER_USB_HAS_SILENT_MODE) + priv->can.do_get_berr_counter = ops->dev_get_berr_counter; + if (driver_info->quirks & KVASER_USB_QUIRK_HAS_SILENT_MODE) priv->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY; priv->can.ctrlmode_supported |= dev->card_data.ctrlmode_supported; if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) { priv->can.data_bittiming_const = dev->cfg->data_bittiming_const; - priv->can.do_set_data_bittiming = - dev->ops->dev_set_data_bittiming; + priv->can.do_set_data_bittiming = ops->dev_set_data_bittiming; } netdev->flags |= IFF_ECHO; @@ -711,29 +745,22 @@ static int kvaser_usb_probe(struct usb_interface *intf, struct kvaser_usb *dev; int err; int i; + const struct kvaser_usb_driver_info *driver_info; + const struct kvaser_usb_dev_ops *ops; + + driver_info = (const struct kvaser_usb_driver_info *)id->driver_info; + if (!driver_info) + return -ENODEV; dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; - if (kvaser_is_leaf(id)) { - dev->card_data.leaf.family = KVASER_LEAF; - dev->ops = &kvaser_usb_leaf_dev_ops; - } else if (kvaser_is_usbcan(id)) { - dev->card_data.leaf.family = KVASER_USBCAN; - dev->ops = &kvaser_usb_leaf_dev_ops; - } else if (kvaser_is_hydra(id)) { - dev->ops = &kvaser_usb_hydra_dev_ops; - } else { - dev_err(&intf->dev, - "Product ID (%d) is not a supported Kvaser USB device\n", - id->idProduct); - return -ENODEV; - } - dev->intf = intf; + dev->driver_info = driver_info; + ops = driver_info->ops; - err = dev->ops->dev_setup_endpoints(dev); + err = ops->dev_setup_endpoints(dev); if (err) { dev_err(&intf->dev, "Cannot get usb endpoint(s)"); return err; @@ -747,22 +774,22 @@ static int kvaser_usb_probe(struct usb_interface *intf, dev->card_data.ctrlmode_supported = 0; dev->card_data.capabilities = 0; - err = dev->ops->dev_init_card(dev); + err = ops->dev_init_card(dev); if (err) { dev_err(&intf->dev, "Failed to initialize card, error %d\n", err); return err; } - err = dev->ops->dev_get_software_info(dev); + err = ops->dev_get_software_info(dev); if (err) { dev_err(&intf->dev, "Cannot get software info, error %d\n", err); return err; } - if (dev->ops->dev_get_software_details) { - err = dev->ops->dev_get_software_details(dev); + if (ops->dev_get_software_details) { + err = ops->dev_get_software_details(dev); if (err) { dev_err(&intf->dev, "Cannot get software details, error %d\n", err); @@ -780,14 +807,14 @@ static int kvaser_usb_probe(struct usb_interface *intf, dev_dbg(&intf->dev, "Max outstanding tx = %d URBs\n", dev->max_tx_urbs); - err = dev->ops->dev_get_card_info(dev); + err = ops->dev_get_card_info(dev); if (err) { dev_err(&intf->dev, "Cannot get card info, error %d\n", err); return err; } - if (dev->ops->dev_get_capabilities) { - err = dev->ops->dev_get_capabilities(dev); + if (ops->dev_get_capabilities) { + err = ops->dev_get_capabilities(dev); if (err) { dev_err(&intf->dev, "Cannot get capabilities, error %d\n", err); @@ -797,7 +824,7 @@ static int kvaser_usb_probe(struct usb_interface *intf, } for (i = 0; i < dev->nchannels; i++) { - err = kvaser_usb_init_one(dev, id, i); + err = kvaser_usb_init_one(dev, i); if (err) { kvaser_usb_remove_interfaces(dev); return err; diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 218fadc91155..a7c408acb0c0 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -371,7 +371,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = { .brp_inc = 1, }; -static const struct can_bittiming_const kvaser_usb_hydra_flexc_bittiming_c = { +const struct can_bittiming_const kvaser_usb_flexc_bittiming_const = { .name = "kvaser_usb_flex", .tseg1_min = 4, .tseg1_max = 16, @@ -2024,5 +2024,5 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_flexc = { .freq = 24000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_hydra_flexc_bittiming_c, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index 8b5d1add899a..0e0403dd0550 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -100,16 +100,6 @@ #define USBCAN_ERROR_STATE_RX_ERROR BIT(1) #define USBCAN_ERROR_STATE_BUSERROR BIT(2) -/* bittiming parameters */ -#define KVASER_USB_TSEG1_MIN 1 -#define KVASER_USB_TSEG1_MAX 16 -#define KVASER_USB_TSEG2_MIN 1 -#define KVASER_USB_TSEG2_MAX 8 -#define KVASER_USB_SJW_MAX 4 -#define KVASER_USB_BRP_MIN 1 -#define KVASER_USB_BRP_MAX 64 -#define KVASER_USB_BRP_INC 1 - /* ctrl modes */ #define KVASER_CTRL_MODE_NORMAL 1 #define KVASER_CTRL_MODE_SILENT 2 @@ -342,48 +332,68 @@ struct kvaser_usb_err_summary { }; }; -static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = { - .name = "kvaser_usb", - .tseg1_min = KVASER_USB_TSEG1_MIN, - .tseg1_max = KVASER_USB_TSEG1_MAX, - .tseg2_min = KVASER_USB_TSEG2_MIN, - .tseg2_max = KVASER_USB_TSEG2_MAX, - .sjw_max = KVASER_USB_SJW_MAX, - .brp_min = KVASER_USB_BRP_MIN, - .brp_max = KVASER_USB_BRP_MAX, - .brp_inc = KVASER_USB_BRP_INC, +static const struct can_bittiming_const kvaser_usb_leaf_m16c_bittiming_const = { + .name = "kvaser_usb_ucii", + .tseg1_min = 4, + .tseg1_max = 16, + .tseg2_min = 2, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 16, + .brp_inc = 1, }; -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_8mhz = { +static const struct can_bittiming_const kvaser_usb_leaf_m32c_bittiming_const = { + .name = "kvaser_usb_leaf", + .tseg1_min = 3, + .tseg1_max = 16, + .tseg2_min = 2, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 2, + .brp_max = 128, + .brp_inc = 2, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_usbcan_dev_cfg = { .clock = { .freq = 8000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, + .bittiming_const = &kvaser_usb_leaf_m16c_bittiming_const, }; -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_16mhz = { +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_m32c_dev_cfg = { .clock = { .freq = 16000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, + .bittiming_const = &kvaser_usb_leaf_m32c_bittiming_const, }; -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_24mhz = { +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_16mhz = { + .clock = { + .freq = 16000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_24mhz = { .clock = { .freq = 24000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_32mhz = { +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_32mhz = { .clock = { .freq = 32000000, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; static void * @@ -405,7 +415,7 @@ kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, sizeof(struct kvaser_cmd_tx_can); cmd->u.tx_can.channel = priv->channel; - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: cmd_tx_can_flags = &cmd->u.tx_can.leaf.flags; break; @@ -525,16 +535,23 @@ static void kvaser_usb_leaf_get_software_info_leaf(struct kvaser_usb *dev, dev->fw_version = le32_to_cpu(softinfo->fw_version); dev->max_tx_urbs = le16_to_cpu(softinfo->max_outstanding_tx); - switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) { - case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK: - dev->cfg = &kvaser_usb_leaf_dev_cfg_16mhz; - break; - case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK: - dev->cfg = &kvaser_usb_leaf_dev_cfg_24mhz; - break; - case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK: - dev->cfg = &kvaser_usb_leaf_dev_cfg_32mhz; - break; + if (dev->driver_info->quirks & KVASER_USB_QUIRK_IGNORE_CLK_FREQ) { + /* Firmware expects bittiming parameters calculated for 16MHz + * clock, regardless of the actual clock + */ + dev->cfg = &kvaser_usb_leaf_m32c_dev_cfg; + } else { + switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) { + case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_16mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_24mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_32mhz; + break; + } } } @@ -551,7 +568,7 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) if (err) return err; - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: kvaser_usb_leaf_get_software_info_leaf(dev, &cmd.u.leaf.softinfo); break; @@ -559,7 +576,7 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) dev->fw_version = le32_to_cpu(cmd.u.usbcan.softinfo.fw_version); dev->max_tx_urbs = le16_to_cpu(cmd.u.usbcan.softinfo.max_outstanding_tx); - dev->cfg = &kvaser_usb_leaf_dev_cfg_8mhz; + dev->cfg = &kvaser_usb_leaf_usbcan_dev_cfg; break; } @@ -598,7 +615,7 @@ static int kvaser_usb_leaf_get_card_info(struct kvaser_usb *dev) dev->nchannels = cmd.u.cardinfo.nchannels; if (dev->nchannels > KVASER_USB_MAX_NET_DEVICES || - (dev->card_data.leaf.family == KVASER_USBCAN && + (dev->driver_info->family == KVASER_USBCAN && dev->nchannels > MAX_USBCAN_NET_DEVICES)) return -EINVAL; @@ -734,7 +751,7 @@ kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv, new_state < CAN_STATE_BUS_OFF) priv->can.can_stats.restarts++; - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: if (es->leaf.error_factor) { priv->can.can_stats.bus_error++; @@ -813,7 +830,7 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, } } - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: if (es->leaf.error_factor) { cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; @@ -1005,7 +1022,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, stats = &priv->netdev->stats; if ((cmd->u.rx_can_header.flag & MSG_FLAG_ERROR_FRAME) && - (dev->card_data.leaf.family == KVASER_LEAF && + (dev->driver_info->family == KVASER_LEAF && cmd->id == CMD_LEAF_LOG_MESSAGE)) { kvaser_usb_leaf_leaf_rx_error(dev, cmd); return; @@ -1021,7 +1038,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, return; } - switch (dev->card_data.leaf.family) { + switch (dev->driver_info->family) { case KVASER_LEAF: rx_data = cmd->u.leaf.rx_can.data; break; @@ -1036,7 +1053,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, return; } - if (dev->card_data.leaf.family == KVASER_LEAF && cmd->id == + if (dev->driver_info->family == KVASER_LEAF && cmd->id == CMD_LEAF_LOG_MESSAGE) { cf->can_id = le32_to_cpu(cmd->u.leaf.log_message.id); if (cf->can_id & KVASER_EXTENDED_FRAME) @@ -1133,14 +1150,14 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, break; case CMD_LEAF_LOG_MESSAGE: - if (dev->card_data.leaf.family != KVASER_LEAF) + if (dev->driver_info->family != KVASER_LEAF) goto warn; kvaser_usb_leaf_rx_can_msg(dev, cmd); break; case CMD_CHIP_STATE_EVENT: case CMD_CAN_ERROR_EVENT: - if (dev->card_data.leaf.family == KVASER_LEAF) + if (dev->driver_info->family == KVASER_LEAF) kvaser_usb_leaf_leaf_rx_error(dev, cmd); else kvaser_usb_leaf_usbcan_rx_error(dev, cmd); @@ -1152,12 +1169,12 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, /* Ignored commands */ case CMD_USBCAN_CLOCK_OVERFLOW_EVENT: - if (dev->card_data.leaf.family != KVASER_USBCAN) + if (dev->driver_info->family != KVASER_USBCAN) goto warn; break; case CMD_FLUSH_QUEUE_REPLY: - if (dev->card_data.leaf.family != KVASER_LEAF) + if (dev->driver_info->family != KVASER_LEAF) goto warn; break; diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 375998263af7..1a3fba352cad 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -239,7 +239,7 @@ static const struct can_bittiming_const xcan_bittiming_const_canfd = { }; /* AXI CANFD Data Bittiming constants as per AXI CANFD 1.0 specs */ -static struct can_bittiming_const xcan_data_bittiming_const_canfd = { +static const struct can_bittiming_const xcan_data_bittiming_const_canfd = { .name = DRIVER_NAME, .tseg1_min = 1, .tseg1_max = 16, @@ -259,20 +259,20 @@ static const struct can_bittiming_const xcan_bittiming_const_canfd2 = { .tseg2_min = 1, .tseg2_max = 128, .sjw_max = 128, - .brp_min = 2, + .brp_min = 1, .brp_max = 256, .brp_inc = 1, }; /* AXI CANFD 2.0 Data Bittiming constants as per AXI CANFD 2.0 spec */ -static struct can_bittiming_const xcan_data_bittiming_const_canfd2 = { +static const struct can_bittiming_const xcan_data_bittiming_const_canfd2 = { .name = DRIVER_NAME, .tseg1_min = 1, .tseg1_max = 32, .tseg2_min = 1, .tseg2_max = 16, .sjw_max = 16, - .brp_min = 2, + .brp_min = 1, .brp_max = 256, .brp_inc = 1, }; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b712b4f27efd..c6563d212476 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -774,6 +774,11 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port, if (duplex == DUPLEX_FULL) reg |= DUPLX_MODE; + if (tx_pause) + reg |= TXFLOW_CNTL; + if (rx_pause) + reg |= RXFLOW_CNTL; + core_writel(priv, reg, offset); } diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 4abae06499a9..70895e480683 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1981,8 +1981,10 @@ static int gswip_gphy_fw_list(struct gswip_priv *priv, for_each_available_child_of_node(gphy_fw_list_np, gphy_fw_np) { err = gswip_gphy_fw_probe(priv, &priv->gphy_fw[i], gphy_fw_np, i); - if (err) + if (err) { + of_node_put(gphy_fw_np); goto remove_gphy; + } i++; } diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index c355824ddb81..265620a81f9f 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1952,13 +1952,7 @@ static void mt7531_sgmii_validate(struct mt7530_priv *priv, int port, /* Port5 supports ethier RGMII or SGMII. * Port6 supports SGMII only. */ - switch (port) { - case 5: - if (mt7531_is_rgmii_port(priv, port)) - break; - fallthrough; - case 6: - phylink_set(supported, 1000baseX_Full); + if (port == 6) { phylink_set(supported, 2500baseX_Full); phylink_set(supported, 2500baseT_Full); } @@ -2315,8 +2309,6 @@ static void mt7530_mac_port_validate(struct dsa_switch *ds, int port, unsigned long *supported) { - if (port == 5) - phylink_set(supported, 1000baseX_Full); } static void mt7531_mac_port_validate(struct dsa_switch *ds, int port, @@ -2353,8 +2345,10 @@ mt753x_phylink_validate(struct dsa_switch *ds, int port, } /* This switch only supports 1G full-duplex. */ - if (state->interface != PHY_INTERFACE_MODE_MII) + if (state->interface != PHY_INTERFACE_MODE_MII) { phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); + } priv->info->mac_port_validate(ds, port, mask); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index e79a808375fc..7b7a8a74405d 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3148,6 +3148,7 @@ static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip, */ child = of_get_child_by_name(np, "mdio"); err = mv88e6xxx_mdio_register(chip, child, false); + of_node_put(child); if (err) return err; diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index a7d8d45e0e94..b779f3adbc56 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -163,7 +163,8 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) mdio = mdiobus_alloc(); if (mdio == NULL) { netdev_err(dev, "Error allocating MDIO bus\n"); - return -ENOMEM; + ret = -ENOMEM; + goto put_node; } mdio->name = ALTERA_TSE_RESOURCE_NAME; @@ -180,6 +181,7 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) mdio->id); goto out_free_mdio; } + of_node_put(mdio_node); if (netif_msg_drv(priv)) netdev_info(dev, "MDIO bus %s: created\n", mdio->id); @@ -189,6 +191,8 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) out_free_mdio: mdiobus_free(mdio); mdio = NULL; +put_node: + of_node_put(mdio_node); return ret; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index fc5ea434a27c..a0ce213c473b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -385,7 +385,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev) } } -static int aq_suspend_common(struct device *dev, bool deep) +static int aq_suspend_common(struct device *dev) { struct aq_nic_s *nic = pci_get_drvdata(to_pci_dev(dev)); @@ -398,17 +398,15 @@ static int aq_suspend_common(struct device *dev, bool deep) if (netif_running(nic->ndev)) aq_nic_stop(nic); - if (deep) { - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - aq_nic_set_power(nic); - } + aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); + aq_nic_set_power(nic); rtnl_unlock(); return 0; } -static int atl_resume_common(struct device *dev, bool deep) +static int atl_resume_common(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct aq_nic_s *nic; @@ -421,11 +419,6 @@ static int atl_resume_common(struct device *dev, bool deep) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (deep) { - /* Reinitialize Nic/Vecs objects */ - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - } - if (netif_running(nic->ndev)) { ret = aq_nic_init(nic); if (ret) @@ -450,22 +443,22 @@ static int atl_resume_common(struct device *dev, bool deep) static int aq_pm_freeze(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev); } static int aq_pm_suspend_poweroff(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev); } static int aq_pm_thaw(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev); } static int aq_pm_resume_restore(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev); } static const struct dev_pm_ops aq_pm_ops = { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 72f8751784c3..e9c6f1fa0b1a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -345,7 +345,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int budget) { struct net_device *ndev = aq_nic_get_ndev(self->aq_nic); - bool is_rsc_completed = true; int err = 0; for (; (self->sw_head != self->hw_head) && budget; @@ -363,12 +362,17 @@ int aq_ring_rx_clean(struct aq_ring_s *self, continue; if (!buff->is_eop) { + unsigned int frag_cnt = 0U; buff_ = buff; do { + bool is_rsc_completed = true; + if (buff_->next >= self->size) { err = -EIO; goto err_exit; } + + frag_cnt++; next_ = buff_->next, buff_ = &self->buff_ring[next_]; is_rsc_completed = @@ -376,18 +380,17 @@ int aq_ring_rx_clean(struct aq_ring_s *self, next_, self->hw_head); - if (unlikely(!is_rsc_completed)) - break; + if (unlikely(!is_rsc_completed) || + frag_cnt > MAX_SKB_FRAGS) { + err = 0; + goto err_exit; + } buff->is_error |= buff_->is_error; buff->is_cso_err |= buff_->is_cso_err; } while (!buff_->is_eop); - if (!is_rsc_completed) { - err = 0; - goto err_exit; - } if (buff->is_error || (buff->is_lro && buff->is_cso_err)) { buff_ = buff; @@ -445,7 +448,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, ALIGN(hdr_len, sizeof(long))); if (buff->len - hdr_len > 0) { - skb_add_rx_frag(skb, 0, buff->rxdata.page, + skb_add_rx_frag(skb, i++, buff->rxdata.page, buff->rxdata.pg_off + hdr_len, buff->len - hdr_len, AQ_CFG_RX_FRAME_MAX); @@ -454,7 +457,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; - i = 1U; do { next_ = buff_->next; buff_ = &self->buff_ring[next_]; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 9f1b15077e7d..45c17c585d74 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -889,6 +889,13 @@ int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self, err = -ENXIO; goto err_exit; } + + /* Validate that the new hw_head_ is reasonable. */ + if (hw_head_ >= ring->size) { + err = -ENXIO; + goto err_exit; + } + ring->hw_head = hw_head_; err = aq_hw_err_from_flags(self); diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile index 7046ad6d3d0e..ac50da49ca77 100644 --- a/drivers/net/ethernet/broadcom/Makefile +++ b/drivers/net/ethernet/broadcom/Makefile @@ -16,3 +16,8 @@ obj-$(CONFIG_BGMAC_BCMA) += bgmac-bcma.o bgmac-bcma-mdio.o obj-$(CONFIG_BGMAC_PLATFORM) += bgmac-platform.o obj-$(CONFIG_SYSTEMPORT) += bcmsysport.o obj-$(CONFIG_BNXT) += bnxt/ + +# FIXME: temporarily silence -Warray-bounds on non W=1+ builds +ifndef KBUILD_EXTRA_WARN +CFLAGS_tg3.o += -Wno-array-bounds +endif diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 1a703b95208b..82d369d9f7a5 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2592,8 +2592,10 @@ static int bcm_sysport_probe(struct platform_device *pdev) device_set_wakeup_capable(&pdev->dev, 1); priv->wol_clk = devm_clk_get_optional(&pdev->dev, "sw_sysportwol"); - if (IS_ERR(priv->wol_clk)) - return PTR_ERR(priv->wol_clk); + if (IS_ERR(priv->wol_clk)) { + ret = PTR_ERR(priv->wol_clk); + goto err_deregister_fixed_link; + } /* Set the needed headroom once and for all */ BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8); diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index a5fd161ab5ee..26746197515f 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -323,7 +323,6 @@ static void bgmac_remove(struct bcma_device *core) bcma_mdio_mii_unregister(bgmac->mii_bus); bgmac_enet_remove(bgmac); bcma_set_drvdata(core, NULL); - kfree(bgmac); } static struct bcma_driver bgmac_bcma_driver = { diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index bd13f91efe7c..792c8147c2c4 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1092,7 +1092,6 @@ static void gem_rx_refill(struct macb_queue *queue) /* Make hw descriptor updates visible to CPU */ rmb(); - queue->rx_prepared_head++; desc = macb_rx_desc(queue, entry); if (!queue->rx_skbuff[entry]) { @@ -1131,6 +1130,7 @@ static void gem_rx_refill(struct macb_queue *queue) dma_wmb(); desc->addr &= ~MACB_BIT(RX_USED); } + queue->rx_prepared_head++; } /* Make descriptor updates visible to hardware */ diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index a262c949ed76..aceb1fd38ddb 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1150,7 +1150,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, fl6.daddr = ip6h->saddr; fl6.fl6_dport = inet_rsk(oreq)->ir_rmt_port; fl6.fl6_sport = htons(inet_rsk(oreq)->ir_num); - security_req_classify_flow(oreq, flowi6_to_flowi(&fl6)); + security_req_classify_flow(oreq, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(sock_net(lsk), lsk, &fl6, NULL); if (IS_ERR(dst)) goto free_sk; @@ -1235,8 +1235,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk, csk->sndbuf = newsk->sk_sndbuf; csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx; RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk), - sock_net(newsk)-> - ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(newsk)-> + ipv4.sysctl_tcp_window_scaling), tp->window_clamp); neigh_release(n); inet_inherit_port(&tcp_hashinfo, lsk, newsk); @@ -1383,7 +1383,7 @@ static void chtls_pass_accept_request(struct sock *sk, #endif } if (req->tcpopt.wsf <= 14 && - sock_net(sk)->ipv4.sysctl_tcp_window_scaling) { + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { inet_rsk(oreq)->wscale_ok = 1; inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf; } diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index e7b0d7de40fd..c22d945a79fd 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1396,8 +1396,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* alloc_etherdev ensures aligned and zeroed private structures */ dev = alloc_etherdev (sizeof (*tp)); - if (!dev) + if (!dev) { + pci_disable_device(pdev); return -ENOMEM; + } SET_NETDEV_DEV(dev, &pdev->dev); if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) { @@ -1774,6 +1776,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_free_netdev: free_netdev (dev); + pci_disable_device(pdev); return -ENODEV; } diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 649c5c429bd7..1288b5e3d220 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2287,7 +2287,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) /* Uses sync mcc */ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, - u8 page_num, u8 *data) + u8 page_num, u32 off, u32 len, u8 *data) { struct be_dma_mem cmd; struct be_mcc_wrb *wrb; @@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, req->port = cpu_to_le32(adapter->hba_port_num); req->page_num = cpu_to_le32(page_num); status = be_mcc_notify_wait(adapter); - if (!status) { + if (!status && len > 0) { struct be_cmd_resp_port_type *resp = cmd.va; - memcpy(data, resp->page_data, PAGE_DATA_LEN); + memcpy(data, resp->page_data + off, len); } err: mutex_unlock(&adapter->mcc_lock); @@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter) int status; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { switch (adapter->phy.interface_type) { case PHY_TYPE_QSFP: @@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter) int status; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { strlcpy(adapter->phy.vendor_name, page_data + SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index c30d6d6f0f3a..9e17d6a7ab8c 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon, int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state); int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, - u8 page_num, u8 *data); + u8 page_num, u32 off, u32 len, u8 *data); int be_cmd_query_cable_type(struct be_adapter *adapter); int be_cmd_query_sfp_info(struct be_adapter *adapter); int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 99cc1c46fb30..d90bf457e49c 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1338,7 +1338,7 @@ static int be_get_module_info(struct net_device *netdev, return -EOPNOTSUPP; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { if (!page_data[SFP_PLUS_SFF_8472_COMP]) { modinfo->type = ETH_MODULE_SFF_8079; @@ -1356,25 +1356,32 @@ static int be_get_module_eeprom(struct net_device *netdev, { struct be_adapter *adapter = netdev_priv(netdev); int status; + u32 begin, end; if (!check_privilege(adapter, MAX_PRIVILEGES)) return -EOPNOTSUPP; - status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - data); - if (status) - goto err; + begin = eeprom->offset; + end = eeprom->offset + eeprom->len; - if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) { - status = be_cmd_read_port_transceiver_data(adapter, - TR_PAGE_A2, - data + - PAGE_DATA_LEN); + if (begin < PAGE_DATA_LEN) { + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin, + min_t(u32, end, PAGE_DATA_LEN) - begin, + data); + if (status) + goto err; + + data += PAGE_DATA_LEN - begin; + begin = PAGE_DATA_LEN; + } + + if (end > PAGE_DATA_LEN) { + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2, + begin - PAGE_DATA_LEN, + end - begin, data); if (status) goto err; } - if (eeprom->offset) - memcpy(data, data + eeprom->offset, eeprom->len); err: return be_cmd_status(status); } diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 5bc11d1bb9df..969af4dd6405 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1747,6 +1747,19 @@ static int ftgmac100_setup_clk(struct ftgmac100 *priv) return rc; } +static bool ftgmac100_has_child_node(struct device_node *np, const char *name) +{ + struct device_node *child_np = of_get_child_by_name(np, name); + bool ret = false; + + if (child_np) { + ret = true; + of_node_put(child_np); + } + + return ret; +} + static int ftgmac100_probe(struct platform_device *pdev) { struct resource *res; @@ -1860,7 +1873,7 @@ static int ftgmac100_probe(struct platform_device *pdev) /* Display what we found */ phy_attached_info(phy); - } else if (np && !of_get_child_by_name(np, "mdio")) { + } else if (np && !ftgmac100_has_child_node(np, "mdio")) { /* Support legacy ASPEED devicetree descriptions that decribe a * MAC with an embedded MDIO controller but have no "mdio" * child node. Automatically scan the MDIO bus for available @@ -1893,6 +1906,11 @@ static int ftgmac100_probe(struct platform_device *pdev) /* AST2400 doesn't have working HW checksum generation */ if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac"))) netdev->hw_features &= ~NETIF_F_HW_CSUM; + + /* AST2600 tx checksum with NCSI is broken */ + if (priv->use_ncsi && of_device_is_compatible(np, "aspeed,ast2600-mac")) + netdev->hw_features &= ~NETIF_F_HW_CSUM; + if (np && of_get_property(np, "no-hw-checksum", NULL)) netdev->hw_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM); netdev->features |= netdev->hw_features; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c index 4e4029d5c8e1..9553d280ec1b 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c @@ -818,7 +818,6 @@ static int api_chain_init(struct hinic_api_cmd_chain *chain, { struct hinic_hwif *hwif = attr->hwif; struct pci_dev *pdev = hwif->pdev; - size_t cell_ctxt_size; chain->hwif = hwif; chain->chain_type = attr->chain_type; @@ -830,8 +829,8 @@ static int api_chain_init(struct hinic_api_cmd_chain *chain, sema_init(&chain->sem, 1); - cell_ctxt_size = chain->num_cells * sizeof(*chain->cell_ctxt); - chain->cell_ctxt = devm_kzalloc(&pdev->dev, cell_ctxt_size, GFP_KERNEL); + chain->cell_ctxt = devm_kcalloc(&pdev->dev, chain->num_cells, + sizeof(*chain->cell_ctxt), GFP_KERNEL); if (!chain->cell_ctxt) return -ENOMEM; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c index 5a6bbee819cd..21b8235952d3 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c @@ -796,11 +796,10 @@ static int init_cmdqs_ctxt(struct hinic_hwdev *hwdev, struct hinic_cmdq_ctxt *cmdq_ctxts; struct pci_dev *pdev = hwif->pdev; struct hinic_pfhwdev *pfhwdev; - size_t cmdq_ctxts_size; int err; - cmdq_ctxts_size = HINIC_MAX_CMDQ_TYPES * sizeof(*cmdq_ctxts); - cmdq_ctxts = devm_kzalloc(&pdev->dev, cmdq_ctxts_size, GFP_KERNEL); + cmdq_ctxts = devm_kcalloc(&pdev->dev, HINIC_MAX_CMDQ_TYPES, + sizeof(*cmdq_ctxts), GFP_KERNEL); if (!cmdq_ctxts) return -ENOMEM; @@ -884,7 +883,6 @@ int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct hinic_hwif *hwif, struct hinic_func_to_io *func_to_io = cmdqs_to_func_to_io(cmdqs); struct pci_dev *pdev = hwif->pdev; struct hinic_hwdev *hwdev; - size_t saved_wqs_size; u16 max_wqe_size; int err; @@ -895,8 +893,8 @@ int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct hinic_hwif *hwif, if (!cmdqs->cmdq_buf_pool) return -ENOMEM; - saved_wqs_size = HINIC_MAX_CMDQ_TYPES * sizeof(struct hinic_wq); - cmdqs->saved_wqs = devm_kzalloc(&pdev->dev, saved_wqs_size, GFP_KERNEL); + cmdqs->saved_wqs = devm_kcalloc(&pdev->dev, HINIC_MAX_CMDQ_TYPES, + sizeof(*cmdqs->saved_wqs), GFP_KERNEL); if (!cmdqs->saved_wqs) { err = -ENOMEM; goto err_saved_wqs; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 0c74f6674634..799b85c88eff 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -162,7 +162,6 @@ static int init_msix(struct hinic_hwdev *hwdev) struct hinic_hwif *hwif = hwdev->hwif; struct pci_dev *pdev = hwif->pdev; int nr_irqs, num_aeqs, num_ceqs; - size_t msix_entries_size; int i, err; num_aeqs = HINIC_HWIF_NUM_AEQS(hwif); @@ -171,8 +170,8 @@ static int init_msix(struct hinic_hwdev *hwdev) if (nr_irqs > HINIC_HWIF_NUM_IRQS(hwif)) nr_irqs = HINIC_HWIF_NUM_IRQS(hwif); - msix_entries_size = nr_irqs * sizeof(*hwdev->msix_entries); - hwdev->msix_entries = devm_kzalloc(&pdev->dev, msix_entries_size, + hwdev->msix_entries = devm_kcalloc(&pdev->dev, nr_irqs, + sizeof(*hwdev->msix_entries), GFP_KERNEL); if (!hwdev->msix_entries) return -ENOMEM; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c index 19942fef99d9..7396158df64f 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c @@ -631,16 +631,15 @@ static int alloc_eq_pages(struct hinic_eq *eq) struct hinic_hwif *hwif = eq->hwif; struct pci_dev *pdev = hwif->pdev; u32 init_val, addr, val; - size_t addr_size; int err, pg; - addr_size = eq->num_pages * sizeof(*eq->dma_addr); - eq->dma_addr = devm_kzalloc(&pdev->dev, addr_size, GFP_KERNEL); + eq->dma_addr = devm_kcalloc(&pdev->dev, eq->num_pages, + sizeof(*eq->dma_addr), GFP_KERNEL); if (!eq->dma_addr) return -ENOMEM; - addr_size = eq->num_pages * sizeof(*eq->virt_addr); - eq->virt_addr = devm_kzalloc(&pdev->dev, addr_size, GFP_KERNEL); + eq->virt_addr = devm_kcalloc(&pdev->dev, eq->num_pages, + sizeof(*eq->virt_addr), GFP_KERNEL); if (!eq->virt_addr) { err = -ENOMEM; goto err_virt_addr_alloc; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index 819fa13034c0..027dcc453506 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -647,6 +647,7 @@ int hinic_pf_to_mgmt_init(struct hinic_pf_to_mgmt *pf_to_mgmt, err = alloc_msg_buf(pf_to_mgmt); if (err) { dev_err(&pdev->dev, "Failed to allocate msg buffers\n"); + destroy_workqueue(pf_to_mgmt->workq); hinic_health_reporters_destroy(hwdev->devlink_dev); return err; } @@ -654,6 +655,7 @@ int hinic_pf_to_mgmt_init(struct hinic_pf_to_mgmt *pf_to_mgmt, err = hinic_api_cmd_init(pf_to_mgmt->cmd_chain, hwif); if (err) { dev_err(&pdev->dev, "Failed to initialize cmd chains\n"); + destroy_workqueue(pf_to_mgmt->workq); hinic_health_reporters_destroy(hwdev->devlink_dev); return err; } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c index f04ac00e3e70..f930cd6a75f7 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c @@ -192,20 +192,20 @@ static int alloc_page_arrays(struct hinic_wqs *wqs) { struct hinic_hwif *hwif = wqs->hwif; struct pci_dev *pdev = hwif->pdev; - size_t size; - size = wqs->num_pages * sizeof(*wqs->page_paddr); - wqs->page_paddr = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + wqs->page_paddr = devm_kcalloc(&pdev->dev, wqs->num_pages, + sizeof(*wqs->page_paddr), GFP_KERNEL); if (!wqs->page_paddr) return -ENOMEM; - size = wqs->num_pages * sizeof(*wqs->page_vaddr); - wqs->page_vaddr = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + wqs->page_vaddr = devm_kcalloc(&pdev->dev, wqs->num_pages, + sizeof(*wqs->page_vaddr), GFP_KERNEL); if (!wqs->page_vaddr) goto err_page_vaddr; - size = wqs->num_pages * sizeof(*wqs->shadow_page_vaddr); - wqs->shadow_page_vaddr = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + wqs->shadow_page_vaddr = devm_kcalloc(&pdev->dev, wqs->num_pages, + sizeof(*wqs->shadow_page_vaddr), + GFP_KERNEL); if (!wqs->shadow_page_vaddr) goto err_page_shadow_vaddr; @@ -378,15 +378,14 @@ static int alloc_wqes_shadow(struct hinic_wq *wq) { struct hinic_hwif *hwif = wq->hwif; struct pci_dev *pdev = hwif->pdev; - size_t size; - size = wq->num_q_pages * wq->max_wqe_size; - wq->shadow_wqe = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + wq->shadow_wqe = devm_kcalloc(&pdev->dev, wq->num_q_pages, + wq->max_wqe_size, GFP_KERNEL); if (!wq->shadow_wqe) return -ENOMEM; - size = wq->num_q_pages * sizeof(wq->prod_idx); - wq->shadow_idx = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + wq->shadow_idx = devm_kcalloc(&pdev->dev, wq->num_q_pages, + sizeof(*wq->shadow_idx), GFP_KERNEL); if (!wq->shadow_idx) goto err_shadow_idx; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 350225bbe0be..ace949fe6233 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -144,13 +144,12 @@ static int create_txqs(struct hinic_dev *nic_dev) { int err, i, j, num_txqs = hinic_hwdev_num_qps(nic_dev->hwdev); struct net_device *netdev = nic_dev->netdev; - size_t txq_size; if (nic_dev->txqs) return -EINVAL; - txq_size = num_txqs * sizeof(*nic_dev->txqs); - nic_dev->txqs = devm_kzalloc(&netdev->dev, txq_size, GFP_KERNEL); + nic_dev->txqs = devm_kcalloc(&netdev->dev, num_txqs, + sizeof(*nic_dev->txqs), GFP_KERNEL); if (!nic_dev->txqs) return -ENOMEM; @@ -242,13 +241,12 @@ static int create_rxqs(struct hinic_dev *nic_dev) { int err, i, j, num_rxqs = hinic_hwdev_num_qps(nic_dev->hwdev); struct net_device *netdev = nic_dev->netdev; - size_t rxq_size; if (nic_dev->rxqs) return -EINVAL; - rxq_size = num_rxqs * sizeof(*nic_dev->rxqs); - nic_dev->rxqs = devm_kzalloc(&netdev->dev, rxq_size, GFP_KERNEL); + nic_dev->rxqs = devm_kcalloc(&netdev->dev, num_rxqs, + sizeof(*nic_dev->rxqs), GFP_KERNEL); if (!nic_dev->rxqs) return -ENOMEM; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 8da7d46363b2..3828b09bfea3 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -861,7 +861,6 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq, struct hinic_dev *nic_dev = netdev_priv(netdev); struct hinic_hwdev *hwdev = nic_dev->hwdev; int err, irqname_len; - size_t sges_size; txq->netdev = netdev; txq->sq = sq; @@ -870,13 +869,13 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq, txq->max_sges = HINIC_MAX_SQ_BUFDESCS; - sges_size = txq->max_sges * sizeof(*txq->sges); - txq->sges = devm_kzalloc(&netdev->dev, sges_size, GFP_KERNEL); + txq->sges = devm_kcalloc(&netdev->dev, txq->max_sges, + sizeof(*txq->sges), GFP_KERNEL); if (!txq->sges) return -ENOMEM; - sges_size = txq->max_sges * sizeof(*txq->free_sges); - txq->free_sges = devm_kzalloc(&netdev->dev, sges_size, GFP_KERNEL); + txq->free_sges = devm_kcalloc(&netdev->dev, txq->max_sges, + sizeof(*txq->free_sges), GFP_KERNEL); if (!txq->free_sges) { err = -ENOMEM; goto err_alloc_free_sges; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 61fb2a092451..7fe2e47dc83d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -5228,6 +5228,15 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) release_sub_crqs(adapter, 0); rc = init_sub_crqs(adapter); } else { + /* no need to reinitialize completely, but we do + * need to clean up transmits that were in flight + * when we processed the reset. Failure to do so + * will confound the upper layer, usually TCP, by + * creating the illusion of transmits that are + * awaiting completion. + */ + clean_tx_pools(adapter); + rc = reset_sub_crq_queues(adapter); } } else { diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index effdc3361266..dd630b6bc74b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -37,6 +37,7 @@ #include #include #include +#include #include "i40e_type.h" #include "i40e_prototype.h" #include @@ -991,6 +992,21 @@ static inline void i40e_write_fd_input_set(struct i40e_pf *pf, (u32)(val & 0xFFFFFFFFULL)); } +/** + * i40e_get_pf_count - get PCI PF count. + * @hw: pointer to a hw. + * + * Reports the function number of the highest PCI physical + * function plus 1 as it is loaded from the NVM. + * + * Return: PCI PF count. + **/ +static inline u32 i40e_get_pf_count(struct i40e_hw *hw) +{ + return FIELD_GET(I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK, + rd32(hw, I40E_GLGEN_PCIFCNCNT)); +} + /* needed by i40e_ethtool.c */ int i40e_up(struct i40e_vsi *vsi); void i40e_down(struct i40e_vsi *vsi); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index a2bdb2906519..63054061966e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2582,15 +2582,16 @@ static void i40e_diag_test(struct net_device *netdev, set_bit(__I40E_TESTING, pf->state); + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) { + dev_warn(&pf->pdev->dev, + "Cannot start offline testing when PF is in reset state.\n"); + goto skip_ol_tests; + } + if (i40e_active_vfs(pf) || i40e_active_vmdqs(pf)) { dev_warn(&pf->pdev->dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); - data[I40E_ETH_TEST_REG] = 1; - data[I40E_ETH_TEST_EEPROM] = 1; - data[I40E_ETH_TEST_INTR] = 1; - data[I40E_ETH_TEST_LINK] = 1; - eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__I40E_TESTING, pf->state); goto skip_ol_tests; } @@ -2637,9 +2638,17 @@ static void i40e_diag_test(struct net_device *netdev, data[I40E_ETH_TEST_INTR] = 0; } -skip_ol_tests: - netif_info(pf, drv, netdev, "testing finished\n"); + return; + +skip_ol_tests: + data[I40E_ETH_TEST_REG] = 1; + data[I40E_ETH_TEST_EEPROM] = 1; + data[I40E_ETH_TEST_INTR] = 1; + data[I40E_ETH_TEST_LINK] = 1; + eth_test->flags |= ETH_TEST_FL_FAILED; + clear_bit(__I40E_TESTING, pf->state); + netif_info(pf, drv, netdev, "testing failed\n"); } static void i40e_get_wol(struct net_device *netdev, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 4a18a7c7dd4c..1dad62ecb8a3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -548,6 +548,47 @@ void i40e_pf_reset_stats(struct i40e_pf *pf) pf->hw_csum_rx_error = 0; } +/** + * i40e_compute_pci_to_hw_id - compute index form PCI function. + * @vsi: ptr to the VSI to read from. + * @hw: ptr to the hardware info. + **/ +static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw) +{ + int pf_count = i40e_get_pf_count(hw); + + if (vsi->type == I40E_VSI_SRIOV) + return (hw->port * BIT(7)) / pf_count + vsi->vf_id; + + return hw->port + BIT(7); +} + +/** + * i40e_stat_update64 - read and update a 64 bit stat from the chip. + * @hw: ptr to the hardware info. + * @hireg: the high 32 bit reg to read. + * @loreg: the low 32 bit reg to read. + * @offset_loaded: has the initial offset been loaded yet. + * @offset: ptr to current offset value. + * @stat: ptr to the stat. + * + * Since the device stats are not reset at PFReset, they will not + * be zeroed when the driver starts. We'll save the first values read + * and use them as offsets to be subtracted from the raw values in order + * to report stats that count from zero. + **/ +static void i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg, + bool offset_loaded, u64 *offset, u64 *stat) +{ + u64 new_data; + + new_data = rd64(hw, loreg); + + if (!offset_loaded || new_data < *offset) + *offset = new_data; + *stat = new_data - *offset; +} + /** * i40e_stat_update48 - read and update a 48 bit stat from the chip * @hw: ptr to the hardware info @@ -619,6 +660,34 @@ static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat) *stat += new_data; } +/** + * i40e_stats_update_rx_discards - update rx_discards. + * @vsi: ptr to the VSI to be updated. + * @hw: ptr to the hardware info. + * @stat_idx: VSI's stat_counter_idx. + * @offset_loaded: ptr to the VSI's stat_offsets_loaded. + * @stat_offset: ptr to stat_offset to store first read of specific register. + * @stat: ptr to VSI's stat to be updated. + **/ +static void +i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw, + int stat_idx, bool offset_loaded, + struct i40e_eth_stats *stat_offset, + struct i40e_eth_stats *stat) +{ + u64 rx_rdpc, rx_rxerr; + + i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded, + &stat_offset->rx_discards, &rx_rdpc); + i40e_stat_update64(hw, + I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)), + I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)), + offset_loaded, &stat_offset->rx_discards_other, + &rx_rxerr); + + stat->rx_discards = rx_rdpc + rx_rxerr; +} + /** * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters. * @vsi: the VSI to be updated @@ -678,6 +747,10 @@ void i40e_update_eth_stats(struct i40e_vsi *vsi) I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); + + i40e_stats_update_rx_discards(vsi, hw, stat_idx, + vsi->stat_offsets_loaded, oes, es); + vsi->stat_offsets_loaded = true; } @@ -1834,11 +1907,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, * non-zero req_queue_pairs says that user requested a new * queue count via ethtool's set_channels, so use this * value for queues distribution across traffic classes + * We need at least one queue pair for the interface + * to be usable as we see in else statement. */ if (vsi->req_queue_pairs > 0) vsi->num_queue_pairs = vsi->req_queue_pairs; else if (pf->flags & I40E_FLAG_MSIX_ENABLED) vsi->num_queue_pairs = pf->num_lan_msix; + else + vsi->num_queue_pairs = 1; } /* Number of queues per enabled TC */ @@ -8163,6 +8240,11 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, return -EOPNOTSUPP; } + if (!tc) { + dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination"); + return -EINVAL; + } + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) return -EBUSY; @@ -10109,7 +10191,7 @@ static int i40e_reset(struct i40e_pf *pf) **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { - int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state); + const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf); struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; i40e_status ret; @@ -10117,13 +10199,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) int v; if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && - i40e_check_recovery_mode(pf)) { + is_recovery_mode_reported) i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev); - } if (test_bit(__I40E_DOWN, pf->state) && - !test_bit(__I40E_RECOVERY_MODE, pf->state) && - !old_recovery_mode_bit) + !test_bit(__I40E_RECOVERY_MODE, pf->state)) goto clear_recovery; dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); @@ -10150,13 +10230,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * accordingly with regard to resources initialization * and deinitialization */ - if (test_bit(__I40E_RECOVERY_MODE, pf->state) || - old_recovery_mode_bit) { + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { if (i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities)) goto end_unlock; - if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { + if (is_recovery_mode_reported) { /* we're staying in recovery mode so we'll reinitialize * misc vector here */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 8335f151ceef..117fd9674d7f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -77,6 +77,11 @@ #define I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT 0 #define I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT 16 #define I40E_GLGEN_MSRWD_MDIRDDATA_MASK I40E_MASK(0xFFFF, I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT) +#define I40E_GLGEN_PCIFCNCNT 0x001C0AB4 /* Reset: PCIR */ +#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT 0 +#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK I40E_MASK(0x1F, I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT) +#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT 16 +#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_MASK I40E_MASK(0xFF, I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT) #define I40E_GLGEN_RSTAT 0x000B8188 /* Reset: POR */ #define I40E_GLGEN_RSTAT_DEVSTATE_SHIFT 0 #define I40E_GLGEN_RSTAT_DEVSTATE_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_DEVSTATE_SHIFT) @@ -461,6 +466,14 @@ #define I40E_VFQF_HKEY1_MAX_INDEX 12 #define I40E_VFQF_HLUT1(_i, _VF) (0x00220000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...15, _VF=0...127 */ /* Reset: CORER */ #define I40E_VFQF_HLUT1_MAX_INDEX 15 +#define I40E_GL_RXERR1H(_i) (0x00318004 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */ +#define I40E_GL_RXERR1H_MAX_INDEX 143 +#define I40E_GL_RXERR1H_RXERR1H_SHIFT 0 +#define I40E_GL_RXERR1H_RXERR1H_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1H_RXERR1H_SHIFT) +#define I40E_GL_RXERR1L(_i) (0x00318000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */ +#define I40E_GL_RXERR1L_MAX_INDEX 143 +#define I40E_GL_RXERR1L_RXERR1L_SHIFT 0 +#define I40E_GL_RXERR1L_RXERR1L_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1L_RXERR1L_SHIFT) #define I40E_GLPRT_BPRCH(_i) (0x003005E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */ #define I40E_GLPRT_BPRCL(_i) (0x003005E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */ #define I40E_GLPRT_BPTCH(_i) (0x00300A04 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index add67f7b73e8..446672a7e39f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -1172,6 +1172,7 @@ struct i40e_eth_stats { u64 tx_broadcast; /* bptc */ u64 tx_discards; /* tdpc */ u64 tx_errors; /* tepc */ + u64 rx_discards_other; /* rxerr1 */ }; /* Statistics collected per VEB per TC */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 9181e007e039..1947c5a77550 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2228,7 +2228,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) } if (vf->adq_enabled) { - for (i = 0; i < I40E_MAX_VF_VSI; i++) + for (i = 0; i < vf->num_tc; i++) num_qps_all += vf->ch[i].num_qps; if (num_qps_all != qci->num_queue_pairs) { aq_ret = I40E_ERR_PARAM; diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 256fa07d54d5..99983f7a0ce0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1263,11 +1263,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, { struct iavf_rx_buffer *rx_buffer; - if (!size) - return NULL; - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; prefetchw(rx_buffer->page); + if (!size) + return rx_buffer; /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 421fc707f80a..7f1bf71844bc 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -652,7 +652,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) rx_desc = ICE_RX_DESC(rx_ring, i); if (!(rx_desc->wb.status_error0 & - cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS))) + (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) | + cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S))))) continue; rx_buf = &rx_ring->rx_buf[i]; @@ -2174,6 +2175,42 @@ ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks, return err; } +/** + * ice_set_phy_type_from_speed - set phy_types based on speeds + * and advertised modes + * @ks: ethtool link ksettings struct + * @phy_type_low: pointer to the lower part of phy_type + * @phy_type_high: pointer to the higher part of phy_type + * @adv_link_speed: targeted link speeds bitmap + */ +static void +ice_set_phy_type_from_speed(const struct ethtool_link_ksettings *ks, + u64 *phy_type_low, u64 *phy_type_high, + u16 adv_link_speed) +{ + /* Handle 1000M speed in a special way because ice_update_phy_type + * enables all link modes, but having mixed copper and optical + * standards is not supported. + */ + adv_link_speed &= ~ICE_AQ_LINK_SPEED_1000MB; + + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseT_Full)) + *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_T | + ICE_PHY_TYPE_LOW_1G_SGMII; + + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseKX_Full)) + *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_KX; + + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseX_Full)) + *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_SX | + ICE_PHY_TYPE_LOW_1000BASE_LX; + + ice_update_phy_type(phy_type_low, phy_type_high, adv_link_speed); +} + /** * ice_set_link_ksettings - Set Speed and Duplex * @netdev: network interface device structure @@ -2310,7 +2347,8 @@ ice_set_link_ksettings(struct net_device *netdev, adv_link_speed = curr_link_speed; /* Convert the advertise link speeds to their corresponded PHY_TYPE */ - ice_update_phy_type(&phy_type_low, &phy_type_high, adv_link_speed); + ice_set_phy_type_from_speed(ks, &phy_type_low, &phy_type_high, + adv_link_speed); if (!autoneg_changed && adv_link_speed == curr_link_speed) { netdev_info(netdev, "Nothing changed, exiting without setting anything.\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index eb0625b52e45..810f2bdb9164 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5203,10 +5203,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi) if (vsi->netdev) { ice_set_rx_mode(vsi->netdev); - err = ice_vsi_vlan_setup(vsi); + if (vsi->type != ICE_VSI_LB) { + err = ice_vsi_vlan_setup(vsi); - if (err) - return err; + if (err) + return err; + } } ice_vsi_cfg_dcb_rings(vsi); @@ -5271,9 +5273,10 @@ static int ice_up_complete(struct ice_vsi *vsi) netif_carrier_on(vsi->netdev); } - /* clear this now, and the first stats read will be used as baseline */ - vsi->stat_offsets_loaded = false; - + /* Perform an initial read of the statistics registers now to + * set the baseline so counters are ready when interface is up + */ + ice_update_eth_stats(vsi); ice_service_task_schedule(pf); return 0; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index f854d41c6c94..4e51f4bb58ff 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4813,8 +4813,11 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring) while (i != tx_ring->next_to_use) { union e1000_adv_tx_desc *eop_desc, *tx_desc; - /* Free all the Tx ring sk_buffs */ - dev_kfree_skb_any(tx_buffer->skb); + /* Free all the Tx ring sk_buffs or xdp frames */ + if (tx_buffer->type == IGB_TYPE_SKB) + dev_kfree_skb_any(tx_buffer->skb); + else + xdp_return_frame(tx_buffer->xdpf); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -5499,7 +5502,8 @@ static void igb_watchdog_task(struct work_struct *work) break; } - if (adapter->link_speed != SPEED_1000) + if (adapter->link_speed != SPEED_1000 || + !hw->phy.ops.read_reg) goto no_wait; /* wait for Remote receiver status OK */ @@ -9825,11 +9829,10 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) struct e1000_hw *hw = &adapter->hw; u32 dmac_thr; u16 hwm; + u32 reg; if (hw->mac.type > e1000_82580) { if (adapter->flags & IGB_FLAG_DMAC) { - u32 reg; - /* force threshold to 0. */ wr32(E1000_DMCTXTH, 0); @@ -9862,7 +9865,6 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* Disable BMC-to-OS Watchdog Enable */ if (hw->mac.type != e1000_i354) reg &= ~E1000_DMACR_DC_BMC2OSW_EN; - wr32(E1000_DMACR, reg); /* no lower threshold to disable @@ -9879,12 +9881,12 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) */ wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); + } - /* make low power state decision controlled - * by DMA coal - */ + if (hw->mac.type >= e1000_i210 || + (adapter->flags & IGB_FLAG_DMAC)) { reg = rd32(E1000_PCIEMISC); - reg &= ~E1000_PCIEMISC_LX_DECISION; + reg |= E1000_PCIEMISC_LX_DECISION; wr32(E1000_PCIEMISC, reg); } /* endif adapter->dmac is not disabled */ } else if (hw->mac.type == e1000_82580) { diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index fd37d2c203af..7f3523f0d196 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -187,15 +187,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw) igc_check_for_copper_link(hw); - /* Verify phy id and set remaining function pointers */ - switch (phy->id) { - case I225_I_PHY_ID: - phy->type = igc_phy_i225; - break; - default: - ret_val = -IGC_ERR_PHY; - goto out; - } + phy->type = igc_phy_i225; out: return ret_val; diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 55dae7c4703f..7e29f41f70e0 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -22,6 +22,7 @@ #define IGC_DEV_ID_I220_V 0x15F7 #define IGC_DEV_ID_I225_K 0x3100 #define IGC_DEV_ID_I225_K2 0x3101 +#define IGC_DEV_ID_I226_K 0x3102 #define IGC_DEV_ID_I225_LMVP 0x5502 #define IGC_DEV_ID_I225_IT 0x0D9F #define IGC_DEV_ID_I226_LM 0x125B diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 61cebb7df6bc..e7ffe63925fd 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -4177,20 +4178,12 @@ bool igc_has_link(struct igc_adapter *adapter) * false until the igc_check_for_link establishes link * for copper adapters ONLY */ - switch (hw->phy.media_type) { - case igc_media_type_copper: - if (!hw->mac.get_link_status) - return true; - hw->mac.ops.check_for_link(hw); - link_active = !hw->mac.get_link_status; - break; - default: - case igc_media_type_unknown: - break; - } + if (!hw->mac.get_link_status) + return true; + hw->mac.ops.check_for_link(hw); + link_active = !hw->mac.get_link_status; - if (hw->mac.type == igc_i225 && - hw->phy.id == I225_I_PHY_ID) { + if (hw->mac.type == igc_i225) { if (!netif_carrier_ok(adapter->netdev)) { adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { @@ -4940,6 +4933,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; + if (IGC_REMOVED(hw_addr)) + return ~value; + value = readl(&hw_addr[reg]); /* reads should not return all F's */ @@ -5049,6 +5045,10 @@ static int igc_probe(struct pci_dev *pdev, pci_enable_pcie_error_reporting(pdev); + err = pci_enable_ptm(pdev, NULL); + if (err < 0) + dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n"); + pci_set_master(pdev); err = -ENOMEM; diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 8de4de2e5636..3a103406eadb 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -249,8 +249,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw) return ret_val; } - if ((phy->autoneg_mask & ADVERTISE_2500_FULL) && - hw->phy.id == I225_I_PHY_ID) { + if (phy->autoneg_mask & ADVERTISE_2500_FULL) { /* Read the MULTI GBT AN Control Register - reg 7.32 */ ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK << MMD_DEVADDR_SHIFT) | @@ -390,8 +389,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw) ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg); - if ((phy->autoneg_mask & ADVERTISE_2500_FULL) && - hw->phy.id == I225_I_PHY_ID) + if (phy->autoneg_mask & ADVERTISE_2500_FULL) ret_val = phy->ops.write_reg(hw, (STANDARD_AN_REG_MASK << MMD_DEVADDR_SHIFT) | diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index b52dd9d737e8..a273e1c33b3f 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -252,7 +252,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg); #define wr32(reg, val) \ do { \ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ - writel((val), &hw_addr[(reg)]); \ + if (!IGC_REMOVED(hw_addr)) \ + writel((val), &hw_addr[(reg)]); \ } while (0) #define rd32(reg) (igc_rd32(hw, reg)) @@ -264,4 +265,6 @@ do { \ #define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2))) +#define IGC_REMOVED(h) unlikely(!(h)) + #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index de0fc6ecf491..27c6f911737b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -769,6 +769,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_IPSEC struct ixgbe_ipsec *ipsec; #endif /* CONFIG_IXGBE_IPSEC */ + spinlock_t vfs_lock; }; static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a3a02e2f92f6..b5b8be4672aa 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6403,6 +6403,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, /* n-tuple support exists, always init our spinlock */ spin_lock_init(&adapter->fdir_perfect_lock); + /* init spinlock to avoid concurrency of VF resources */ + spin_lock_init(&adapter->vfs_lock); + #ifdef CONFIG_IXGBE_DCB ixgbe_init_dcb(adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 214a38de3f41..0078ae592616 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -204,10 +204,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs) int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { unsigned int num_vfs = adapter->num_vfs, vf; + unsigned long flags; int rss; + spin_lock_irqsave(&adapter->vfs_lock, flags); /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; + spin_unlock_irqrestore(&adapter->vfs_lock, flags); /* put the reference to all of the vf devices */ for (vf = 0; vf < num_vfs; ++vf) { @@ -1157,9 +1160,9 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter, switch (xcast_mode) { case IXGBEVF_XCAST_MODE_NONE: - disable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE | + disable = IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE; - enable = 0; + enable = IXGBE_VMOLR_BAM; break; case IXGBEVF_XCAST_MODE_MULTI: disable = IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE; @@ -1181,9 +1184,9 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter, return -EPERM; } - disable = 0; + disable = IXGBE_VMOLR_VPE; enable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE | - IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE; + IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE; break; default: return -EOPNOTSUPP; @@ -1305,8 +1308,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf) void ixgbe_msg_task(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + unsigned long flags; u32 vf; + spin_lock_irqsave(&adapter->vfs_lock, flags); for (vf = 0; vf < adapter->num_vfs; vf++) { /* process any reset requests */ if (!ixgbe_check_for_rst(hw, vf)) @@ -1320,6 +1325,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter) if (!ixgbe_check_for_ack(hw, vf)) ixgbe_rcv_ack_from_vf(adapter, vf); } + spin_unlock_irqrestore(&adapter->vfs_lock, flags); } void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 7d7dc0754a3a..c7aff89141e1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -806,6 +806,17 @@ static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd, rxd->rxd4 = READ_ONCE(dma_rxd->rxd4); } +static void *mtk_max_lro_buf_alloc(gfp_t gfp_mask) +{ + unsigned int size = mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH); + unsigned long data; + + data = __get_free_pages(gfp_mask | __GFP_COMP | __GFP_NOWARN, + get_order(size)); + + return (void *)data; +} + /* the qdma core needs scratch memory to be setup */ static int mtk_init_fq_dma(struct mtk_eth *eth) { @@ -1303,7 +1314,10 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, goto release_desc; /* alloc new buffer */ - new_data = napi_alloc_frag(ring->frag_size); + if (ring->frag_size <= PAGE_SIZE) + new_data = napi_alloc_frag(ring->frag_size); + else + new_data = mtk_max_lro_buf_alloc(GFP_ATOMIC); if (unlikely(!new_data)) { netdev->stats.rx_dropped++; goto release_desc; @@ -1700,7 +1714,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) return -ENOMEM; for (i = 0; i < rx_dma_size; i++) { - ring->data[i] = netdev_alloc_frag(ring->frag_size); + if (ring->frag_size <= PAGE_SIZE) + ring->data[i] = netdev_alloc_frag(ring->frag_size); + else + ring->data[i] = mtk_max_lro_buf_alloc(GFP_KERNEL); if (!ring->data[i]) return -ENOMEM; } @@ -1966,6 +1983,9 @@ static int mtk_hwlro_get_fdir_entry(struct net_device *dev, struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + if (fsp->location >= ARRAY_SIZE(mac->hwlro_ip)) + return -EINVAL; + /* only tcp dst ipv4 is meaningful, others are meaningless */ fsp->flow_type = TCP_V4_FLOW; fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 01275c376721..962851000ace 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -2099,7 +2099,7 @@ static int mlx4_en_get_module_eeprom(struct net_device *dev, en_err(priv, "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n", i, offset, ee->len - i, ret); - return 0; + return ret; } i += ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 857be86b4a11..e8a4adccd2b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -675,6 +675,9 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) if (!tracer->owner) return; + if (unlikely(!tracer->str_db.loaded)) + goto arm; + block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE; start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE; @@ -732,6 +735,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) &tmp_trace_block[TRACES_PER_BLOCK - 1]); } +arm: mlx5_fw_tracer_arm(dev); } @@ -1138,8 +1142,7 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void queue_work(tracer->work_queue, &tracer->ownership_change_work); break; case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE: - if (likely(tracer->str_db.loaded)) - queue_work(tracer->work_queue, &tracer->handle_traces_work); + queue_work(tracer->work_queue, &tracer->handle_traces_work); break; default: mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index d06532d0baa4..634777fd7db9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -231,8 +231,7 @@ mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_rx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_rx *) > - TLS_OFFLOAD_CONTEXT_SIZE_RX); + BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX); *ctx = priv_rx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index b140e13fdcc8..679747db3110 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -63,8 +63,7 @@ mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_tx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_tx *) > - TLS_OFFLOAD_CONTEXT_SIZE_TX); + BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX); *ctx = priv_tx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 16e98ac47624..cfc3bfcb04a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4009,6 +4009,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } + if (params->xdp_prog) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with XDP\n"); + features &= ~NETIF_F_LRO; + } + } + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { features &= ~NETIF_F_RXHASH; if (netdev->features & NETIF_F_RXHASH) @@ -4569,6 +4576,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) unlock: mutex_unlock(&priv->state_lock); + + /* Need to fix some features. */ + if (!err) + netdev_update_features(netdev); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 78f6a6f0a7e0..ff4f10d0f090 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -536,7 +536,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env) u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; struct mlx5_core_dev *mdev = priv->mdev; - if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + if (!mlx5e_stats_grp_vnic_env_num_stats(priv)) return; MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 55772f0cbbf8..4bdcceffe9d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1520,9 +1520,22 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, return NULL; } -static bool check_conflicting_actions(u32 action1, u32 action2) +static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0, + const struct mlx5_fs_vlan *vlan1) { - u32 xored_actions = action1 ^ action2; + return vlan0->ethtype != vlan1->ethtype || + vlan0->vid != vlan1->vid || + vlan0->prio != vlan1->prio; +} + +static bool check_conflicting_actions(const struct mlx5_flow_act *act1, + const struct mlx5_flow_act *act2) +{ + u32 action1 = act1->action; + u32 action2 = act2->action; + u32 xored_actions; + + xored_actions = action1 ^ action2; /* if one rule only wants to count, it's ok */ if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT || @@ -1539,6 +1552,22 @@ static bool check_conflicting_actions(u32 action1, u32 action2) MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) return true; + if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && + act1->pkt_reformat != act2->pkt_reformat) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + act1->modify_hdr != act2->modify_hdr) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0])) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 && + check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1])) + return true; + return false; } @@ -1546,7 +1575,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_context *flow_context, const struct mlx5_flow_act *flow_act) { - if (check_conflicting_actions(flow_act->action, fte->action.action)) { + if (check_conflicting_actions(flow_act, &fte->action)) { mlx5_core_warn(get_dev(&fte->node), "Found two FTEs with conflicting actions\n"); return -EEXIST; @@ -2024,16 +2053,16 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) down_write_ref_node(&fte->node, false); for (i = handle->num_rules - 1; i >= 0; i--) tree_remove_node(&handle->rule[i]->node, true); - if (fte->dests_size) { - if (fte->modify_mask) - modify_fte(fte); - up_write_ref_node(&fte->node, false); - } else if (list_empty(&fte->node.children)) { + if (list_empty(&fte->node.children)) { del_hw_fte(&fte->node); /* Avoid double call to del_hw_fte */ fte->node.del_hw_func = NULL; up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); + } else if (fte->dests_size) { + if (fte->modify_mask) + modify_fte(fte); + up_write_ref_node(&fte->node, false); } else { up_write_ref_node(&fte->node, false); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 11cc3ea5010a..9fb3e5ec1da6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -274,7 +274,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - struct lag_tracker tracker; + struct lag_tracker tracker = { }; bool do_bond, roce_lag; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 96c39a17d026..b227fa9ada46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -43,11 +43,10 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns, err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action); if (err && action) { err = mlx5dr_action_destroy(action); - if (err) { - action = NULL; - mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", - err); - } + if (err) + mlx5_core_err(ns->dev, + "Failed to destroy action (%d)\n", err); + action = NULL; } ft->fs_dr_table.miss_action = action; if (old_miss_action) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h index a68d931090dd..15c8d4de8350 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h @@ -8,8 +8,8 @@ #include "spectrum.h" enum mlxsw_sp_counter_sub_pool_id { - MLXSW_SP_COUNTER_SUB_POOL_FLOW, MLXSW_SP_COUNTER_SUB_POOL_RIF, + MLXSW_SP_COUNTER_SUB_POOL_FLOW, }; int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 5f92b1691360..aff6d4f35cd2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -168,8 +168,6 @@ static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev, static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev, struct dcb_app *app) { - int prio; - if (app->priority >= IEEE_8021QAZ_MAX_TCS) { netdev_err(dev, "APP entry with priority value %u is invalid\n", app->priority); @@ -183,17 +181,6 @@ static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev, app->protocol); return -EINVAL; } - - /* Warn about any DSCP APP entries with the same PID. */ - prio = fls(dcb_ieee_getapp_mask(dev, app)); - if (prio--) { - if (prio < app->priority) - netdev_warn(dev, "Choosing priority %d for DSCP %d in favor of previously-active value of %d\n", - app->priority, app->protocol, prio); - else if (prio > app->priority) - netdev_warn(dev, "Ignoring new priority %d for DSCP %d in favor of current value of %d\n", - app->priority, app->protocol, prio); - } break; case IEEE_8021QAZ_APP_SEL_ETHERTYPE: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 53128382fc2e..d2887ae508bb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4003,7 +4003,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, { const struct fib_nh *nh = fib_info_nh(fi, 0); - return nh->fib_nh_scope == RT_SCOPE_LINK || + return nh->fib_nh_gw_family || mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); } @@ -8038,13 +8038,14 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { struct net *net = mlxsw_sp_net(mlxsw_sp); - bool usp = net->ipv4.sysctl_ip_fwd_update_priority; char rgcr_pl[MLXSW_REG_RGCR_LEN]; u64 max_rifs; + bool usp; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) return -EIO; max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority); mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 433f14ade464..02ba6aa01105 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -709,7 +709,7 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = { .trap = MLXSW_SP_TRAP_CONTROL(LLDP, LLDP, TRAP), .listeners_arr = { MLXSW_RXL(mlxsw_sp_rx_ptp_listener, LLDP, TRAP_TO_CPU, - false, SP_LLDP, DISCARD), + true, SP_LLDP, DISCARD), }, }, { diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index a06466ecca12..a55861ea4206 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1593,8 +1593,12 @@ int ocelot_init(struct ocelot *ocelot) ocelot_write_rix(ocelot, ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)), ANA_PGID_PGID, PGID_MC); - ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV4); - ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV6); + ocelot_write_rix(ocelot, + ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)), + ANA_PGID_PGID, PGID_MCIPV4); + ocelot_write_rix(ocelot, + ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)), + ANA_PGID_PGID, PGID_MCIPV6); /* Allow manual injection via DEVCPU_QS registers, and byte swap these * registers endianness. diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index cd0c9623f7dd..e0b801d10739 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -286,8 +286,6 @@ nfp_net_get_link_ksettings(struct net_device *netdev, /* Init to unknowns */ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); - ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.port = PORT_OTHER; cmd->base.speed = SPEED_UNKNOWN; cmd->base.duplex = DUPLEX_UNKNOWN; @@ -295,6 +293,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev, port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); if (eth_port) { + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.autoneg = eth_port->aneg != NFP_ANEG_DISABLED ? AUTONEG_ENABLE : AUTONEG_DISABLE; nfp_net_set_fec_link_mode(eth_port, cmd); diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index c9f32fc50254..2219e4c59ae6 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3628,7 +3628,8 @@ static void ql_reset_work(struct work_struct *work) qdev->mem_map_registers; unsigned long hw_flags; - if (test_bit((QL_RESET_PER_SCSI | QL_RESET_START), &qdev->flags)) { + if (test_bit(QL_RESET_PER_SCSI, &qdev->flags) || + test_bit(QL_RESET_START, &qdev->flags)) { clear_bit(QL_LINK_MASTER, &qdev->flags); /* diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5eac3f494d9e..c025dadcce28 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4183,7 +4183,6 @@ static void rtl8169_tso_csum_v1(struct sk_buff *skb, u32 *opts) static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, struct sk_buff *skb, u32 *opts) { - u32 transport_offset = (u32)skb_transport_offset(skb); struct skb_shared_info *shinfo = skb_shinfo(skb); u32 mss = shinfo->gso_size; @@ -4200,7 +4199,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, WARN_ON_ONCE(1); } - opts[0] |= transport_offset << GTTCPHO_SHIFT; + opts[0] |= skb_transport_offset(skb) << GTTCPHO_SHIFT; opts[1] |= mss << TD1_MSS_SHIFT; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 ip_protocol; @@ -4228,7 +4227,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, else WARN_ON_ONCE(1); - opts[1] |= transport_offset << TCPHO_SHIFT; + opts[1] |= skb_transport_offset(skb) << TCPHO_SHIFT; } else { unsigned int padto = rtl_quirk_packet_padto(tp, skb); @@ -4401,14 +4400,13 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { - int transport_offset = skb_transport_offset(skb); struct rtl8169_private *tp = netdev_priv(dev); if (skb_is_gso(skb)) { if (tp->mac_version == RTL_GIGA_MAC_VER_34) features = rtl8168evl_fix_tso(skb, features); - if (transport_offset > GTTCPHO_MAX && + if (skb_transport_offset(skb) > GTTCPHO_MAX && rtl_chip_supports_csum_v2(tp)) features &= ~NETIF_F_ALL_TSO; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -4419,7 +4417,7 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb, if (rtl_quirk_packet_padto(tp, skb)) features &= ~NETIF_F_CSUM_MASK; - if (transport_offset > TCPHO_MAX && + if (skb_transport_offset(skb) > TCPHO_MAX && rtl_chip_supports_csum_v2(tp)) features &= ~NETIF_F_CSUM_MASK; } diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 6f950979d25e..5b7413305be6 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1916,7 +1916,10 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx) efx_update_sw_stats(efx, stats); out: + /* releasing a DMA coherent buffer with BH disabled can panic */ + spin_unlock_bh(&efx->stats_lock); efx_nic_free_buffer(efx, &stats_buf); + spin_lock_bh(&efx->stats_lock); return rc; } @@ -2240,7 +2243,7 @@ int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb, * guaranteed to satisfy the second as we only attempt TSO if * inner_network_header <= 208. */ - ip_tot_len = -EFX_TSO2_MAX_HDRLEN; + ip_tot_len = 0x10000 - EFX_TSO2_MAX_HDRLEN; EFX_WARN_ON_ONCE_PARANOID(mss + EFX_TSO2_MAX_HDRLEN + (tcp->doff << 2u) > ip_tot_len); diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 84041cd587d7..b44acb6e3953 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -411,8 +411,9 @@ static int efx_ef10_pci_sriov_enable(struct efx_nic *efx, int num_vfs) static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) { struct pci_dev *dev = efx->pci_dev; + struct efx_ef10_nic_data *nic_data = efx->nic_data; unsigned int vfs_assigned = pci_vfs_assigned(dev); - int rc = 0; + int i, rc = 0; if (vfs_assigned && !force) { netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; " @@ -420,10 +421,13 @@ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) return -EBUSY; } - if (!vfs_assigned) + if (!vfs_assigned) { + for (i = 0; i < efx->vf_count; i++) + nic_data->vf[i].pci_dev = NULL; pci_disable_sriov(dev); - else + } else { rc = -EBUSY; + } efx_ef10_sriov_free_vf_vswitching(efx); efx->vf_count = 0; diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index 2ab8571ef1cc..d0f1b2dc7dff 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -287,6 +287,7 @@ int efx_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1; efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->tx_channel_offset = 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; rc = pci_enable_msi(efx->pci_dev); @@ -307,6 +308,7 @@ int efx_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->tx_channel_offset = 1; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; efx->legacy_irq = efx->pci_dev->irq; @@ -858,10 +860,6 @@ int efx_set_channels(struct efx_nic *efx) int xdp_queue_number; int rc; - efx->tx_channel_offset = - efx_separate_tx_channels ? - efx->n_channels - efx->n_tx_channels : 0; - if (efx->xdp_tx_queue_count) { EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 9f7dfdf708cf..8aecb4bd2c0d 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1522,7 +1522,7 @@ static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel) static inline bool efx_channel_has_tx_queues(struct efx_channel *channel) { - return true; + return channel && channel->channel >= channel->efx->tx_channel_offset; } static inline unsigned int efx_channel_num_tx_queues(struct efx_channel *channel) diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 725b0f38813a..a2b4e3befa59 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type); if (tx_queue && tx_queue->timestamping) { + /* This code invokes normal driver TX code which is always + * protected from softirqs when called from generic TX code, + * which in turn disables preemption. Look at __dev_queue_xmit + * which uses rcu_read_lock_bh disabling preemption for RCU + * plus disabling softirqs. We do not need RCU reader + * protection here. + * + * Although it is theoretically safe for current PTP TX/RX code + * running without disabling softirqs, there are three good + * reasond for doing so: + * + * 1) The code invoked is mainly implemented for non-PTP + * packets and it is always executed with softirqs + * disabled. + * 2) This being a single PTP packet, better to not + * interrupt its processing by softirqs which can lead + * to high latencies. + * 3) netdev_xmit_more checks preemption is disabled and + * triggers a BUG_ON if not. + */ + local_bh_disable(); efx_enqueue_skb(tx_queue, skb); + local_bh_enable(); } else { WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); dev_kfree_skb_any(skb); diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 51cd7dca91cd..3a7d2baec6c1 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -1513,14 +1513,14 @@ static void epic_remove_one(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct epic_private *ep = netdev_priv(dev); + unregister_netdev(dev); dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma); dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma); - unregister_netdev(dev); pci_iounmap(pdev, ep->ioaddr); - pci_release_regions(pdev); free_netdev(dev); + pci_release_regions(pdev); pci_disable_device(pdev); /* pci_power_off(pdev, -1); */ } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 2342d497348e..fd1b0cc6b5fa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -363,6 +363,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev, data->fix_mac_speed = tegra_eqos_fix_speed; data->init = tegra_eqos_init; data->bsp_priv = eqos; + data->sph_disable = 1; err = tegra_eqos_init(pdev, eqos); if (err < 0) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 16c538cfaf59..2e71e510e127 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -215,6 +215,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan) if (queue == 0 || queue == 4) { value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK; value |= MTL_RXQ_DMA_Q04MDMACH(chan); + } else if (queue > 4) { + value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4); + value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4); } else { value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue); value |= MTL_RXQ_DMA_QXMDMACH(chan, queue); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e9aa9a5eba6b..27b7bb64a028 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -738,19 +738,10 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) struct timespec64 now; u32 sec_inc = 0; u64 temp = 0; - int ret; if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) return -EOPNOTSUPP; - ret = clk_prepare_enable(priv->plat->clk_ptp_ref); - if (ret < 0) { - netdev_warn(priv->dev, - "failed to enable PTP reference clock: %pe\n", - ERR_PTR(ret)); - return ret; - } - stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags); priv->systime_flags = systime_flags; @@ -2755,6 +2746,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) stmmac_mmc_setup(priv); + if (ptp_register) { + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + } + ret = stmmac_init_ptp(priv); if (ret == -EOPNOTSUPP) netdev_warn(priv->dev, "PTP not supported by HW\n"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 272cb47af9f2..a7a1227c9b92 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -175,7 +175,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev, return -ENOMEM; /* Enable pci device */ - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); @@ -227,8 +227,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev) pcim_iounmap_regions(pdev, BIT(i)); break; } - - pci_disable_device(pdev); } static int __maybe_unused stmmac_pci_suspend(struct device *dev) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index b40b962055fa..f70d8d1ce329 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -814,7 +814,13 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) if (ret) return ret; - stmmac_init_tstamp_counter(priv, priv->systime_flags); + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) { + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + return ret; + } } return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index 0462dcc93e53..dd5c4ef92ef3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -1084,8 +1084,9 @@ static int stmmac_test_rxp(struct stmmac_priv *priv) unsigned char addr[ETH_ALEN] = {0xde, 0xad, 0xbe, 0xef, 0x00, 0x00}; struct tc_cls_u32_offload cls_u32 = { }; struct stmmac_packet_attrs attr = { }; - struct tc_action **actions, *act; + struct tc_action **actions; struct tc_u32_sel *sel; + struct tcf_gact *gact; struct tcf_exts *exts; int ret, i, nk = 1; @@ -1104,14 +1105,14 @@ static int stmmac_test_rxp(struct stmmac_priv *priv) goto cleanup_sel; } - actions = kzalloc(nk * sizeof(*actions), GFP_KERNEL); + actions = kcalloc(nk, sizeof(*actions), GFP_KERNEL); if (!actions) { ret = -ENOMEM; goto cleanup_exts; } - act = kzalloc(nk * sizeof(*act), GFP_KERNEL); - if (!act) { + gact = kcalloc(nk, sizeof(*gact), GFP_KERNEL); + if (!gact) { ret = -ENOMEM; goto cleanup_actions; } @@ -1126,9 +1127,7 @@ static int stmmac_test_rxp(struct stmmac_priv *priv) exts->nr_actions = nk; exts->actions = actions; for (i = 0; i < nk; i++) { - struct tcf_gact *gact = to_gact(&act[i]); - - actions[i] = &act[i]; + actions[i] = (struct tc_action *)&gact[i]; gact->tcf_action = TC_ACT_SHOT; } @@ -1152,7 +1151,7 @@ static int stmmac_test_rxp(struct stmmac_priv *priv) stmmac_tc_setup_cls_u32(priv, priv, &cls_u32); cleanup_act: - kfree(act); + kfree(gact); cleanup_actions: kfree(actions); cleanup_exts: diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 0805edef5625..059d68d48f1e 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1716,6 +1716,7 @@ static int am65_cpsw_init_cpts(struct am65_cpsw_common *common) if (IS_ERR(cpts)) { int ret = PTR_ERR(cpts); + of_node_put(node); if (ret == -EOPNOTSUPP) { dev_info(dev, "cpts disabled\n"); return 0; @@ -2064,9 +2065,9 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) if (!node) return -ENOENT; common->port_num = of_get_child_count(node); + of_node_put(node); if (common->port_num < 1 || common->port_num > AM65_CPSW_MAX_PORTS) return -ENOENT; - of_node_put(node); if (common->port_num != 1) return -EOPNOTSUPP; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e3676386d0ee..18484370da0d 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2629,7 +2629,10 @@ static int netvsc_suspend(struct hv_device *dev) /* Save the current config info */ ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev); - + if (!ndev_ctx->saved_netvsc_dev_info) { + ret = -ENOMEM; + goto out; + } ret = netvsc_detach(net, nvdev); out: rtnl_unlock(); diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 2a65efd3e8da..fe91b72eca36 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1209,9 +1209,10 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) struct gsi_event *event_done; struct gsi_event *event; struct gsi_trans *trans; + u32 trans_count = 0; u32 byte_count = 0; - u32 old_index; u32 event_avail; + u32 old_index; trans_info = &channel->trans_info; @@ -1232,6 +1233,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) do { trans->len = __le16_to_cpu(event->len); byte_count += trans->len; + trans_count++; /* Move on to the next event and transaction */ if (--event_avail) @@ -1243,7 +1245,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index) /* We record RX bytes when they are received */ channel->byte_count += byte_count; - channel->trans_count++; + channel->trans_count += trans_count; } /* Initialize a ring, including allocating DMA memory for its entries */ diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 621648ce750b..7e1208446e04 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -610,12 +610,14 @@ static void ipa_endpoint_init_aggr(struct ipa_endpoint *endpoint) if (endpoint->data->aggregation) { if (!endpoint->toward_ipa) { + u32 buffer_size; u32 limit; val |= u32_encode_bits(IPA_ENABLE_AGGR, AGGR_EN_FMASK); val |= u32_encode_bits(IPA_GENERIC, AGGR_TYPE_FMASK); - limit = ipa_aggr_size_kb(IPA_RX_BUFFER_SIZE); + buffer_size = IPA_RX_BUFFER_SIZE - NET_SKB_PAD; + limit = ipa_aggr_size_kb(buffer_size); val |= u32_encode_bits(limit, AGGR_BYTE_LIMIT_FMASK); limit = IPA_AGGR_TIME_LIMIT_DEFAULT; @@ -882,7 +884,7 @@ static int ipa_endpoint_replenish_one(struct ipa_endpoint *endpoint) err_trans_free: gsi_trans_free(trans); err_free_pages: - __free_pages(page, get_order(IPA_RX_BUFFER_SIZE)); + put_page(page); return -ENOMEM; } @@ -1177,7 +1179,7 @@ void ipa_endpoint_trans_release(struct ipa_endpoint *endpoint, struct page *page = trans->data; if (page) - __free_pages(page, get_order(IPA_RX_BUFFER_SIZE)); + put_page(page); } } diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 789a124809e3..70c5905a916b 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -240,6 +240,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) #define DEFAULT_SEND_SCI true #define DEFAULT_ENCRYPT false #define DEFAULT_ENCODING_SA 0 +#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) static bool send_sci(const struct macsec_secy *secy) { @@ -1694,7 +1695,7 @@ static bool validate_add_rxsa(struct nlattr **attrs) return false; if (attrs[MACSEC_SA_ATTR_PN] && - *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0) + nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { @@ -1750,7 +1751,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) } pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; - if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { + if (tb_sa[MACSEC_SA_ATTR_PN] && + nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); @@ -1766,7 +1768,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), - MACSEC_SA_ATTR_SALT); + MACSEC_SALT_LEN); rtnl_unlock(); return -EINVAL; } @@ -1839,7 +1841,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) return 0; cleanup: - kfree(rx_sa); + macsec_rxsa_put(rx_sa); rtnl_unlock(); return err; } @@ -1936,7 +1938,7 @@ static bool validate_add_txsa(struct nlattr **attrs) if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; - if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) + if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { @@ -2008,7 +2010,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), - MACSEC_SA_ATTR_SALT); + MACSEC_SALT_LEN); rtnl_unlock(); return -EINVAL; } @@ -2082,7 +2084,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) cleanup: secy->operational = was_operational; - kfree(tx_sa); + macsec_txsa_put(tx_sa); rtnl_unlock(); return err; } @@ -2290,7 +2292,7 @@ static bool validate_upd_sa(struct nlattr **attrs) if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; - if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) + if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { @@ -3737,9 +3739,6 @@ static int macsec_changelink_common(struct net_device *dev, secy->operational = tx_sa && tx_sa->active; } - if (data[IFLA_MACSEC_WINDOW]) - secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); - if (data[IFLA_MACSEC_ENCRYPT]) tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]); @@ -3785,6 +3784,16 @@ static int macsec_changelink_common(struct net_device *dev, } } + if (data[IFLA_MACSEC_WINDOW]) { + secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); + + /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window + * for XPN cipher suites */ + if (secy->xpn && + secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW) + return -EINVAL; + } + return 0; } @@ -3814,7 +3823,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], ret = macsec_changelink_common(dev, data); if (ret) - return ret; + goto cleanup; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index 41e7c1432497..75a62d1cc737 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -34,6 +34,8 @@ #define MDIO_AN_VEND_PROV 0xc400 #define MDIO_AN_VEND_PROV_1000BASET_FULL BIT(15) #define MDIO_AN_VEND_PROV_1000BASET_HALF BIT(14) +#define MDIO_AN_VEND_PROV_5000BASET_FULL BIT(11) +#define MDIO_AN_VEND_PROV_2500BASET_FULL BIT(10) #define MDIO_AN_VEND_PROV_DOWNSHIFT_EN BIT(4) #define MDIO_AN_VEND_PROV_DOWNSHIFT_MASK GENMASK(3, 0) #define MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT 4 @@ -230,9 +232,20 @@ static int aqr_config_aneg(struct phy_device *phydev) phydev->advertising)) reg |= MDIO_AN_VEND_PROV_1000BASET_HALF; + /* Handle the case when the 2.5G and 5G speeds are not advertised */ + if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->advertising)) + reg |= MDIO_AN_VEND_PROV_2500BASET_FULL; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + phydev->advertising)) + reg |= MDIO_AN_VEND_PROV_5000BASET_FULL; + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_VEND_PROV, MDIO_AN_VEND_PROV_1000BASET_HALF | - MDIO_AN_VEND_PROV_1000BASET_FULL, reg); + MDIO_AN_VEND_PROV_1000BASET_FULL | + MDIO_AN_VEND_PROV_2500BASET_FULL | + MDIO_AN_VEND_PROV_5000BASET_FULL, reg); if (ret < 0) return ret; if (ret > 0) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 3d75b98f3051..3a8849716459 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -243,9 +243,7 @@ static int dp83822_config_intr(struct phy_device *phydev) if (misr_status < 0) return misr_status; - misr_status |= (DP83822_RX_ERR_HF_INT_EN | - DP83822_FALSE_CARRIER_HF_INT_EN | - DP83822_LINK_STAT_INT_EN | + misr_status |= (DP83822_LINK_STAT_INT_EN | DP83822_ENERGY_DET_INT_EN | DP83822_LINK_QUAL_INT_EN); diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index c716074fdef0..f86acad0aad4 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -137,6 +137,7 @@ #define DP83867_DOWNSHIFT_2_COUNT 2 #define DP83867_DOWNSHIFT_4_COUNT 4 #define DP83867_DOWNSHIFT_8_COUNT 8 +#define DP83867_SGMII_AUTONEG_EN BIT(7) /* CFG3 bits */ #define DP83867_CFG3_INT_OE BIT(7) @@ -802,6 +803,32 @@ static int dp83867_phy_reset(struct phy_device *phydev) DP83867_PHYCR_FORCE_LINK_GOOD, 0); } +static void dp83867_link_change_notify(struct phy_device *phydev) +{ + /* There is a limitation in DP83867 PHY device where SGMII AN is + * only triggered once after the device is booted up. Even after the + * PHY TPI is down and up again, SGMII AN is not triggered and + * hence no new in-band message from PHY to MAC side SGMII. + * This could cause an issue during power up, when PHY is up prior + * to MAC. At this condition, once MAC side SGMII is up, MAC side + * SGMII wouldn`t receive new in-band message from TI PHY with + * correct link status, speed and duplex info. + * Thus, implemented a SW solution here to retrigger SGMII Auto-Neg + * whenever there is a link change. + */ + if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { + int val = 0; + + val = phy_clear_bits(phydev, DP83867_CFG2, + DP83867_SGMII_AUTONEG_EN); + if (val < 0) + return; + + phy_set_bits(phydev, DP83867_CFG2, + DP83867_SGMII_AUTONEG_EN); + } +} + static struct phy_driver dp83867_driver[] = { { .phy_id = DP83867_PHY_ID, @@ -826,6 +853,8 @@ static struct phy_driver dp83867_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, + + .link_change_notify = dp83867_link_change_notify, }, }; module_phy_driver(dp83867_driver); diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index c416ab1d2b00..c1cbdac4b376 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -1008,7 +1008,6 @@ int __init mdio_bus_init(void) return ret; } -EXPORT_SYMBOL_GPL(mdio_bus_init); #if IS_ENABLED(CONFIG_PHYLIB) void mdio_bus_exit(void) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 92e94ac94a34..bbbe198f83e8 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -283,7 +283,7 @@ static int kszphy_config_reset(struct phy_device *phydev) } } - if (priv->led_mode >= 0) + if (priv->type && priv->led_mode >= 0) kszphy_setup_led(phydev, priv->type->led_mode_reg, priv->led_mode); return 0; @@ -299,10 +299,10 @@ static int kszphy_config_init(struct phy_device *phydev) type = priv->type; - if (type->has_broadcast_disable) + if (type && type->has_broadcast_disable) kszphy_broadcast_disable(phydev); - if (type->has_nand_tree_disable) + if (type && type->has_nand_tree_disable) kszphy_nand_tree_disable(phydev); return kszphy_config_reset(phydev); @@ -1112,7 +1112,7 @@ static int kszphy_probe(struct phy_device *phydev) priv->type = type; - if (type->led_mode_reg) { + if (type && type->led_mode_reg) { ret = of_property_read_u32(np, "micrel,led-mode", &priv->led_mode); if (ret) @@ -1133,7 +1133,8 @@ static int kszphy_probe(struct phy_device *phydev) unsigned long rate = clk_get_rate(clk); bool rmii_ref_clk_sel_25_mhz; - priv->rmii_ref_clk_sel = type->has_rmii_ref_clk_sel; + if (type) + priv->rmii_ref_clk_sel = type->has_rmii_ref_clk_sel; rmii_ref_clk_sel_25_mhz = of_property_read_bool(np, "micrel,rmii-reference-clock-select-25-mhz"); diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 96068e0d841a..dcbe278086dc 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2427,7 +2427,7 @@ static int sfp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sfp); - err = devm_add_action(sfp->dev, sfp_cleanup, sfp); + err = devm_add_action_or_reset(sfp->dev, sfp_cleanup, sfp); if (err < 0) return err; diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c index 291fa449993f..45f295403cb5 100644 --- a/drivers/net/sungem_phy.c +++ b/drivers/net/sungem_phy.c @@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy) int can_low_power = 1; if (np == NULL || of_get_property(np, "no-autolowpower", NULL)) can_low_power = 0; + of_node_put(np); if (can_low_power) { /* Enable automatic low-power */ sungem_phy_write(phy, 0x1c, 0x9002); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 55ce141c93c7..a643b2f2f4de 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -220,6 +220,9 @@ struct tun_struct { struct tun_prog __rcu *steering_prog; struct tun_prog __rcu *filter_prog; struct ethtool_link_ksettings link_ksettings; + /* init args */ + struct file *file; + struct ifreq *ifr; }; struct veth { @@ -227,6 +230,9 @@ struct veth { __be16 h_vlan_TCI; }; +static void tun_flow_init(struct tun_struct *tun); +static void tun_flow_uninit(struct tun_struct *tun); + static int tun_napi_receive(struct napi_struct *napi, int budget) { struct tun_file *tfile = container_of(napi, struct tun_file, napi); @@ -279,6 +285,12 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, } } +static void tun_napi_enable(struct tun_file *tfile) +{ + if (tfile->napi_enabled) + napi_enable(&tfile->napi); +} + static void tun_napi_disable(struct tun_file *tfile) { if (tfile->napi_enabled) @@ -640,7 +652,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); if (tun && clean) { - tun_napi_disable(tfile); + if (!tfile->detached) + tun_napi_disable(tfile); tun_napi_del(tfile); } @@ -659,8 +672,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (clean) { RCU_INIT_POINTER(tfile->tun, NULL); sock_put(&tfile->sk); - } else + } else { tun_disable_queue(tun, tfile); + tun_napi_disable(tfile); + } synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); @@ -733,6 +748,7 @@ static void tun_detach_all(struct net_device *dev) sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_napi_del(tfile); tun_enable_queue(tfile); tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); @@ -813,6 +829,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, if (tfile->detached) { tun_enable_queue(tfile); + tun_napi_enable(tfile); } else { sock_hold(&tfile->sk); tun_napi_init(tun, tfile, napi, napi_frags); @@ -964,6 +981,49 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) static const struct ethtool_ops tun_ethtool_ops; +static int tun_net_init(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + struct ifreq *ifr = tun->ifr; + int err; + + tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); + if (!tun->pcpu_stats) + return -ENOMEM; + + spin_lock_init(&tun->lock); + + err = security_tun_dev_alloc_security(&tun->security); + if (err < 0) { + free_percpu(tun->pcpu_stats); + return err; + } + + tun_flow_init(tun); + + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | + TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + dev->features = dev->hw_features | NETIF_F_LLTX; + dev->vlan_features = dev->features & + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + tun->flags = (tun->flags & ~TUN_FEATURES) | + (ifr->ifr_flags & TUN_FEATURES); + + INIT_LIST_HEAD(&tun->disabled); + err = tun_attach(tun, tun->file, false, ifr->ifr_flags & IFF_NAPI, + ifr->ifr_flags & IFF_NAPI_FRAGS, false); + if (err < 0) { + tun_flow_uninit(tun); + security_tun_dev_free_security(tun->security); + free_percpu(tun->pcpu_stats); + return err; + } + return 0; +} + /* Net device detach from fd. */ static void tun_net_uninit(struct net_device *dev) { @@ -1205,6 +1265,7 @@ static int tun_net_change_carrier(struct net_device *dev, bool new_carrier) } static const struct net_device_ops tun_netdev_ops = { + .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, @@ -1285,6 +1346,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) } static const struct net_device_ops tap_netdev_ops = { + .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, @@ -1325,7 +1387,7 @@ static void tun_flow_uninit(struct tun_struct *tun) #define MAX_MTU 65535 /* Initialize net device. */ -static void tun_net_init(struct net_device *dev) +static void tun_net_initialize(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -2257,10 +2319,6 @@ static void tun_free_netdev(struct net_device *dev) BUG_ON(!(list_empty(&tun->disabled))); free_percpu(tun->pcpu_stats); - /* We clear pcpu_stats so that tun_set_iff() can tell if - * tun_free_netdev() has been called from register_netdevice(). - */ - tun->pcpu_stats = NULL; tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); @@ -2773,41 +2831,16 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->rx_batched = 0; RCU_INIT_POINTER(tun->steering_prog, NULL); - tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); - if (!tun->pcpu_stats) { - err = -ENOMEM; - goto err_free_dev; - } + tun->ifr = ifr; + tun->file = file; - spin_lock_init(&tun->lock); - - err = security_tun_dev_alloc_security(&tun->security); - if (err < 0) - goto err_free_stat; - - tun_net_init(dev); - tun_flow_init(tun); - - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | - TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; - dev->features = dev->hw_features | NETIF_F_LLTX; - dev->vlan_features = dev->features & - ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - - tun->flags = (tun->flags & ~TUN_FEATURES) | - (ifr->ifr_flags & TUN_FEATURES); - - INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, - ifr->ifr_flags & IFF_NAPI_FRAGS, false); - if (err < 0) - goto err_free_flow; + tun_net_initialize(dev); err = register_netdevice(tun->dev); - if (err < 0) - goto err_detach; + if (err < 0) { + free_netdev(dev); + return err; + } /* free_netdev() won't check refcnt, to aovid race * with dev_put() we need publish tun after registration. */ @@ -2824,24 +2857,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) strcpy(ifr->ifr_name, tun->dev->name); return 0; - -err_detach: - tun_detach_all(dev); - /* We are here because register_netdevice() has failed. - * If register_netdevice() already called tun_free_netdev() - * while dealing with the error, tun->pcpu_stats has been cleared. - */ - if (!tun->pcpu_stats) - goto err_free_dev; - -err_free_flow: - tun_flow_uninit(tun); - security_tun_dev_free_security(tun->security); -err_free_stat: - free_percpu(tun->pcpu_stats); -err_free_dev: - free_netdev(dev); - return err; } static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 0b0cbcee1920..0ac4f59e3f18 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1471,6 +1471,42 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) * are bundled into this buffer and where we can find an array of * per-packet metadata (which contains elements encoded into u16). */ + + /* SKB contents for current firmware: + * + * ... + * + * + * ... + * + * + * + * where: + * contains pkt_len bytes: + * 2 bytes of IP alignment pseudo header + * packet received + * contains 4 bytes: + * pkt_len and fields AX_RXHDR_* + * 0-7 bytes to terminate at + * 8 bytes boundary (64-bit). + * 4 bytes to make rx_hdr terminate at + * 8 bytes boundary (64-bit) + * contains 4 bytes: + * pkt_len=0 and AX_RXHDR_DROP_ERR + * contains 4 bytes: + * pkt_cnt and hdr_off (offset of + * ) + * + * pkt_cnt is number of entrys in the per-packet metadata. + * In current firmware there is 2 entrys per packet. + * The first points to the packet and the + * second is a dummy header. + * This was done probably to align fields in 64-bit and + * maintain compatibility with old firmware. + * This code assumes that and are + * optional. + */ + if (skb->len < 4) return 0; skb_trim(skb, skb->len - 4); @@ -1484,51 +1520,66 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* Make sure that the bounds of the metadata array are inside the SKB * (and in front of the counter at the end). */ - if (pkt_cnt * 2 + hdr_off > skb->len) + if (pkt_cnt * 4 + hdr_off > skb->len) return 0; pkt_hdr = (u32 *)(skb->data + hdr_off); /* Packets must not overlap the metadata array */ skb_trim(skb, hdr_off); - for (; ; pkt_cnt--, pkt_hdr++) { + for (; pkt_cnt > 0; pkt_cnt--, pkt_hdr++) { + u16 pkt_len_plus_padd; u16 pkt_len; le32_to_cpus(pkt_hdr); pkt_len = (*pkt_hdr >> 16) & 0x1fff; + pkt_len_plus_padd = (pkt_len + 7) & 0xfff8; - if (pkt_len > skb->len) + /* Skip dummy header used for alignment + */ + if (pkt_len == 0) + continue; + + if (pkt_len_plus_padd > skb->len) return 0; /* Check CRC or runt packet */ - if (((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) == 0) && - pkt_len >= 2 + ETH_HLEN) { - bool last = (pkt_cnt == 0); - - if (last) { - ax_skb = skb; - } else { - ax_skb = skb_clone(skb, GFP_ATOMIC); - if (!ax_skb) - return 0; - } - ax_skb->len = pkt_len; - /* Skip IP alignment pseudo header */ - skb_pull(ax_skb, 2); - skb_set_tail_pointer(ax_skb, ax_skb->len); - ax_skb->truesize = pkt_len + sizeof(struct sk_buff); - ax88179_rx_checksum(ax_skb, pkt_hdr); - - if (last) - return 1; - - usbnet_skb_return(dev, ax_skb); + if ((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) || + pkt_len < 2 + ETH_HLEN) { + dev->net->stats.rx_errors++; + skb_pull(skb, pkt_len_plus_padd); + continue; } - /* Trim this packet away from the SKB */ - if (!skb_pull(skb, (pkt_len + 7) & 0xFFF8)) + /* last packet */ + if (pkt_len_plus_padd == skb->len) { + skb_trim(skb, pkt_len); + + /* Skip IP alignment pseudo header */ + skb_pull(skb, 2); + + skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd); + ax88179_rx_checksum(skb, pkt_hdr); + return 1; + } + + ax_skb = skb_clone(skb, GFP_ATOMIC); + if (!ax_skb) return 0; + skb_trim(ax_skb, pkt_len); + + /* Skip IP alignment pseudo header */ + skb_pull(ax_skb, 2); + + skb->truesize = pkt_len_plus_padd + + SKB_DATA_ALIGN(sizeof(struct sk_buff)); + ax88179_rx_checksum(ax_skb, pkt_hdr); + usbnet_skb_return(dev, ax_skb); + + skb_pull(skb, pkt_len_plus_padd); } + + return 0; } static struct sk_buff * @@ -1745,7 +1796,7 @@ static const struct driver_info ax88179_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1758,7 +1809,7 @@ static const struct driver_info ax88178a_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1771,7 +1822,7 @@ static const struct driver_info cypress_GX3_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1784,7 +1835,7 @@ static const struct driver_info dlink_dub1312_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1797,7 +1848,7 @@ static const struct driver_info sitecom_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1810,7 +1861,7 @@ static const struct driver_info samsung_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1823,7 +1874,7 @@ static const struct driver_info lenovo_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1836,7 +1887,7 @@ static const struct driver_info belkin_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1849,7 +1900,7 @@ static const struct driver_info toshiba_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1862,7 +1913,7 @@ static const struct driver_info mct_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 597766d14563..48e8b94e4a7c 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1293,6 +1293,8 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 402390b1a66b..58dd77efcaad 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1969,7 +1969,7 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); if (size) { - buf = kmalloc(size, GFP_KERNEL); + buf = kmalloc(size, GFP_NOIO); if (!buf) goto out; } @@ -2001,7 +2001,7 @@ static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); if (data) { - buf = kmemdup(data, size, GFP_KERNEL); + buf = kmemdup(data, size, GFP_NOIO); if (!buf) goto out; } else { @@ -2102,7 +2102,7 @@ static void usbnet_async_cmd_cb(struct urb *urb) int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, u16 value, u16 index, const void *data, u16 size) { - struct usb_ctrlrequest *req = NULL; + struct usb_ctrlrequest *req; struct urb *urb; int err = -ENOMEM; void *buf = NULL; @@ -2120,7 +2120,7 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, if (!buf) { netdev_err(dev->net, "Error allocating buffer" " in %s!\n", __func__); - goto fail_free; + goto fail_free_urb; } } @@ -2144,14 +2144,21 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, if (err < 0) { netdev_err(dev->net, "Error submitting the control" " message: status=%d\n", err); - goto fail_free; + goto fail_free_all; } return 0; +fail_free_all: + kfree(req); fail_free_buf: kfree(buf); -fail_free: - kfree(req); + /* + * avoid a double free + * needed because the flag can be set only + * after filling the URB + */ + urb->transfer_flags = 0; +fail_free_urb: usb_free_urb(urb); fail: return err; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1888876969f2..4df5f07ec38a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -213,9 +213,15 @@ struct virtnet_info { /* Packet virtio header size */ u8 hdr_len; - /* Work struct for refilling if we run low on memory. */ + /* Work struct for delayed refilling if we run low on memory. */ struct delayed_work refill; + /* Is delayed refill enabled? */ + bool refill_enabled; + + /* The lock to synchronize the access to refill_enabled */ + spinlock_t refill_lock; + /* Work struct for config space updates */ struct work_struct config_work; @@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) return p; } +static void enable_delayed_refill(struct virtnet_info *vi) +{ + spin_lock_bh(&vi->refill_lock); + vi->refill_enabled = true; + spin_unlock_bh(&vi->refill_lock); +} + +static void disable_delayed_refill(struct virtnet_info *vi) +{ + spin_lock_bh(&vi->refill_lock); + vi->refill_enabled = false; + spin_unlock_bh(&vi->refill_lock); +} + static void virtqueue_napi_schedule(struct napi_struct *napi, struct virtqueue *vq) { @@ -1403,8 +1423,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget, } if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { - if (!try_fill_recv(vi, rq, GFP_ATOMIC)) - schedule_delayed_work(&vi->refill, 0); + if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { + spin_lock(&vi->refill_lock); + if (vi->refill_enabled) + schedule_delayed_work(&vi->refill, 0); + spin_unlock(&vi->refill_lock); + } } u64_stats_update_begin(&rq->stats.syncp); @@ -1523,6 +1547,8 @@ static int virtnet_open(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i, err; + enable_delayed_refill(vi); + for (i = 0; i < vi->max_queue_pairs; i++) { if (i < vi->curr_queue_pairs) /* Make sure we have some buffers: if oom use wq. */ @@ -1893,6 +1919,8 @@ static int virtnet_close(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i; + /* Make sure NAPI doesn't schedule refill work */ + disable_delayed_refill(vi); /* Make sure refill_work doesn't re-enable napi! */ cancel_delayed_work_sync(&vi->refill); @@ -2366,7 +2394,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = { static void virtnet_freeze_down(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int i; /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); @@ -2374,14 +2401,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev) netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); netif_tx_unlock_bh(vi->dev); - cancel_delayed_work_sync(&vi->refill); - - if (netif_running(vi->dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - napi_disable(&vi->rq[i].napi); - virtnet_napi_tx_disable(&vi->sq[i].napi); - } - } + if (netif_running(vi->dev)) + virtnet_close(vi->dev); } static int init_vqs(struct virtnet_info *vi); @@ -2389,7 +2410,7 @@ static int init_vqs(struct virtnet_info *vi); static int virtnet_restore_up(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int err, i; + int err; err = init_vqs(vi); if (err) @@ -2397,16 +2418,12 @@ static int virtnet_restore_up(struct virtio_device *vdev) virtio_device_ready(vdev); - if (netif_running(vi->dev)) { - for (i = 0; i < vi->curr_queue_pairs; i++) - if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); + enable_delayed_refill(vi); - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); - virtnet_napi_tx_enable(vi, vi->sq[i].vq, - &vi->sq[i].napi); - } + if (netif_running(vi->dev)) { + err = virtnet_open(vi->dev); + if (err) + return err; } netif_tx_lock_bh(vi->dev); @@ -3108,6 +3125,7 @@ static int virtnet_probe(struct virtio_device *vdev) vdev->priv = vi; INIT_WORK(&vi->config_work, virtnet_config_changed_work); + spin_lock_init(&vi->refill_lock); /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || @@ -3187,14 +3205,20 @@ static int virtnet_probe(struct virtio_device *vdev) } } - err = register_netdev(dev); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ + rtnl_lock(); + + err = register_netdevice(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); + rtnl_unlock(); goto free_failover; } virtio_device_ready(vdev); + rtnl_unlock(); + err = virtnet_cpu_notif_add(vi); if (err) { pr_debug("virtio_net: registering cpu notifier failed\n"); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 932a39945cc6..6678a734cc4d 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -595,6 +595,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { dev_kfree_skb_any(rbi->skb); + rbi->skb = NULL; rq->stats.rx_buf_alloc_failure++; break; } @@ -619,6 +620,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { put_page(rbi->page); + rbi->page = NULL; rq->stats.rx_buf_alloc_failure++; break; } @@ -1654,6 +1656,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, u32 i, ring_idx; struct Vmxnet3_RxDesc *rxd; + /* ring has already been cleaned up */ + if (!rq->rx_ring[0].base) + return; + for (ring_idx = 0; ring_idx < 2; ring_idx++) { for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { #ifdef __BIG_ENDIAN_BITFIELD diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c index c0cfd9b36c0b..720952b92e78 100644 --- a/drivers/net/wireguard/noise.c +++ b/drivers/net/wireguard/noise.c @@ -302,6 +302,41 @@ void wg_noise_set_static_identity_private_key( static_identity->static_public, private_key); } +static void hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, const size_t keylen) +{ + struct blake2s_state state; + u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; + u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); + int i; + + if (keylen > BLAKE2S_BLOCK_SIZE) { + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, key, keylen); + blake2s_final(&state, x_key); + } else + memcpy(x_key, key, keylen); + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) + x_key[i] ^= 0x36; + + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); + blake2s_update(&state, in, inlen); + blake2s_final(&state, i_hash); + + for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) + x_key[i] ^= 0x5c ^ 0x36; + + blake2s_init(&state, BLAKE2S_HASH_SIZE); + blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); + blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); + blake2s_final(&state, i_hash); + + memcpy(out, i_hash, BLAKE2S_HASH_SIZE); + memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); + memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); +} + /* This is Hugo Krawczyk's HKDF: * - https://eprint.iacr.org/2010/264.pdf * - https://tools.ietf.org/html/rfc5869 @@ -322,14 +357,14 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, ((third_len || third_dst) && (!second_len || !second_dst)))); /* Extract entropy from data into secret */ - blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); + hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); if (!first_dst || !first_len) goto out; /* Expand first key: key = secret, data = 0x1 */ output[0] = 1; - blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); + hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); memcpy(first_dst, output, first_len); if (!second_dst || !second_len) @@ -337,8 +372,7 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, /* Expand second key: key = secret, data = first-key || 0x2 */ output[BLAKE2S_HASH_SIZE] = 2; - blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, - BLAKE2S_HASH_SIZE); + hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE); memcpy(second_dst, output, second_len); if (!third_dst || !third_len) @@ -346,8 +380,7 @@ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, /* Expand third key: key = secret, data = second-key || 0x3 */ output[BLAKE2S_HASH_SIZE] = 3; - blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, - BLAKE2S_HASH_SIZE); + hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE); memcpy(third_dst, output, third_len); out: diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 473221aa2236..eef5911fa210 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -49,7 +49,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, rt = dst_cache_get_ip4(cache, &fl.saddr); if (!rt) { - security_sk_classify_flow(sock, flowi4_to_flowi(&fl)); + security_sk_classify_flow(sock, flowi4_to_flowi_common(&fl)); if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, fl.saddr, RT_SCOPE_HOST))) { endpoint->src4.s_addr = 0; @@ -129,7 +129,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, dst = dst_cache_get_ip6(cache, &fl.saddr); if (!dst) { - security_sk_classify_flow(sock, flowi6_to_flowi(&fl)); + security_sk_classify_flow(sock, flowi6_to_flowi_common(&fl)); if (unlikely(!ipv6_addr_any(&fl.saddr) && !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { endpoint->src6 = fl.saddr = in6addr_any; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index b59d482d9c23..b61cd275fbda 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5170,13 +5170,29 @@ static int ath10k_start(struct ieee80211_hw *hw) static void ath10k_stop(struct ieee80211_hw *hw) { struct ath10k *ar = hw->priv; + u32 opt; ath10k_drain_tx(ar); mutex_lock(&ar->conf_mutex); if (ar->state != ATH10K_STATE_OFF) { - if (!ar->hw_rfkill_on) - ath10k_halt(ar); + if (!ar->hw_rfkill_on) { + /* If the current driver state is RESTARTING but not yet + * fully RESTARTED because of incoming suspend event, + * then ath10k_halt() is already called via + * ath10k_core_restart() and should not be called here. + */ + if (ar->state != ATH10K_STATE_RESTARTING) { + ath10k_halt(ar); + } else { + /* Suspending here, because when in RESTARTING + * state, ath10k_core_stop() skips + * ath10k_wait_for_suspend(). + */ + opt = WMI_PDEV_SUSPEND_AND_DISABLE_INTR; + ath10k_wait_for_suspend(ar, opt); + } + } ar->state = ATH10K_STATE_OFF; } mutex_unlock(&ar->conf_mutex); diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index cc9122f42024..44282aec069d 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -4008,8 +4008,8 @@ static void ath11k_mgmt_over_wmi_tx_work(struct work_struct *work) } arvif = ath11k_vif_to_arvif(skb_cb->vif); - if (ar->allocated_vdev_map & (1LL << arvif->vdev_id) && - arvif->is_started) { + mutex_lock(&ar->conf_mutex); + if (ar->allocated_vdev_map & (1LL << arvif->vdev_id)) { ret = ath11k_mac_mgmt_tx_wmi(ar, arvif, skb); if (ret) { ath11k_warn(ar->ab, "failed to tx mgmt frame, vdev_id %d :%d\n", @@ -4025,6 +4025,7 @@ static void ath11k_mgmt_over_wmi_tx_work(struct work_struct *work) arvif->is_started); ieee80211_free_txskb(ar->hw, skb); } + mutex_unlock(&ar->conf_mutex); } } @@ -5325,6 +5326,7 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, struct ath11k *ar = hw->priv; struct ath11k_base *ab = ar->ab; struct ath11k_vif *arvif = (void *)vif->drv_priv; + struct ath11k_peer *peer; int ret; mutex_lock(&ar->conf_mutex); @@ -5336,9 +5338,13 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, WARN_ON(!arvif->is_started); if (ab->hw_params.vdev_start_delay && - arvif->vdev_type == WMI_VDEV_TYPE_MONITOR && - ath11k_peer_find_by_addr(ab, ar->mac_addr)) - ath11k_peer_delete(ar, arvif->vdev_id, ar->mac_addr); + arvif->vdev_type == WMI_VDEV_TYPE_MONITOR) { + spin_lock_bh(&ab->base_lock); + peer = ath11k_peer_find_by_addr(ab, ar->mac_addr); + spin_unlock_bh(&ab->base_lock); + if (peer) + ath11k_peer_delete(ar, arvif->vdev_id, ar->mac_addr); + } ret = ath11k_mac_vdev_stop(arvif); if (ret) diff --git a/drivers/net/wireless/ath/ath11k/spectral.c b/drivers/net/wireless/ath/ath11k/spectral.c index ac2a8cfdc1c0..f5ab455ea1a2 100644 --- a/drivers/net/wireless/ath/ath11k/spectral.c +++ b/drivers/net/wireless/ath/ath11k/spectral.c @@ -214,7 +214,10 @@ static int ath11k_spectral_scan_config(struct ath11k *ar, return -ENODEV; arvif->spectral_enabled = (mode != ATH11K_SPECTRAL_DISABLED); + + spin_lock_bh(&ar->spectral.lock); ar->spectral.mode = mode; + spin_unlock_bh(&ar->spectral.lock); ret = ath11k_wmi_vdev_spectral_enable(ar, arvif->vdev_id, ATH11K_WMI_SPECTRAL_TRIGGER_CMD_CLEAR, @@ -829,9 +832,6 @@ static inline void ath11k_spectral_ring_free(struct ath11k *ar) { struct ath11k_spectral *sp = &ar->spectral; - if (!sp->enabled) - return; - ath11k_dbring_srng_cleanup(ar, &sp->rx_ring); ath11k_dbring_buf_cleanup(ar, &sp->rx_ring); } @@ -883,15 +883,16 @@ void ath11k_spectral_deinit(struct ath11k_base *ab) if (!sp->enabled) continue; - ath11k_spectral_debug_unregister(ar); - ath11k_spectral_ring_free(ar); + mutex_lock(&ar->conf_mutex); + ath11k_spectral_scan_config(ar, ATH11K_SPECTRAL_DISABLED); + mutex_unlock(&ar->conf_mutex); spin_lock_bh(&sp->lock); - - sp->mode = ATH11K_SPECTRAL_DISABLED; sp->enabled = false; - spin_unlock_bh(&sp->lock); + + ath11k_spectral_debug_unregister(ar); + ath11k_spectral_ring_free(ar); } } diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index b0a4ca3559fd..abed1effd95c 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -5615,7 +5615,7 @@ unsigned int ar9003_get_paprd_scale_factor(struct ath_hw *ah, static u8 ar9003_get_eepmisc(struct ath_hw *ah) { - return ah->eeprom.map4k.baseEepHeader.eepMisc; + return ah->eeprom.ar9300_eep.baseEepHeader.opCapFlags.eepMisc; } const struct eeprom_ops eep_ar9300_ops = { diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h index a171dbb29fbb..ad949eb02f3d 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h @@ -720,7 +720,7 @@ #define AR_CH0_TOP2 (AR_SREV_9300(ah) ? 0x1628c : \ (AR_SREV_9462(ah) ? 0x16290 : 0x16284)) #define AR_CH0_TOP2_XPABIASLVL (AR_SREV_9561(ah) ? 0x1e00 : 0xf000) -#define AR_CH0_TOP2_XPABIASLVL_S 12 +#define AR_CH0_TOP2_XPABIASLVL_S (AR_SREV_9561(ah) ? 9 : 12) #define AR_CH0_XTAL (AR_SREV_9300(ah) ? 0x16294 : \ ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x16298 : \ diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index 0a1634238e67..6b45e63fae4b 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -281,6 +281,7 @@ struct ath9k_htc_rxbuf { struct ath9k_htc_rx { struct list_head rxbuf; spinlock_t rxbuflock; + bool initialized; }; #define ATH9K_HTC_TX_CLEANUP_INTERVAL 50 /* ms */ @@ -305,6 +306,7 @@ struct ath9k_htc_tx { DECLARE_BITMAP(tx_slot, MAX_TX_BUF_NUM); struct timer_list cleanup_timer; spinlock_t tx_lock; + bool initialized; }; struct ath9k_htc_tx_ctl { diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 0bdc4dcb7b8f..43a743ec9d9e 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -808,6 +808,11 @@ int ath9k_tx_init(struct ath9k_htc_priv *priv) skb_queue_head_init(&priv->tx.data_vi_queue); skb_queue_head_init(&priv->tx.data_vo_queue); skb_queue_head_init(&priv->tx.tx_failed); + + /* Allow ath9k_wmi_event_tasklet(WMI_TXSTATUS_EVENTID) to operate. */ + smp_wmb(); + priv->tx.initialized = true; + return 0; } @@ -1006,6 +1011,14 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, goto rx_next; } + if (rxstatus->rs_keyix >= ATH_KEYMAX && + rxstatus->rs_keyix != ATH9K_RXKEYIX_INVALID) { + ath_dbg(common, ANY, + "Invalid keyix, dropping (keyix: %d)\n", + rxstatus->rs_keyix); + goto rx_next; + } + /* Get the RX status information */ memset(rx_status, 0, sizeof(struct ieee80211_rx_status)); @@ -1125,6 +1138,10 @@ void ath9k_htc_rxep(void *drv_priv, struct sk_buff *skb, struct ath9k_htc_rxbuf *rxbuf = NULL, *tmp_buf = NULL; unsigned long flags; + /* Check if ath9k_rx_init() completed. */ + if (!data_race(priv->rx.initialized)) + goto err; + spin_lock_irqsave(&priv->rx.rxbuflock, flags); list_for_each_entry(tmp_buf, &priv->rx.rxbuf, list) { if (!tmp_buf->in_process) { @@ -1180,6 +1197,10 @@ int ath9k_rx_init(struct ath9k_htc_priv *priv) list_add_tail(&rxbuf->list, &priv->rx.rxbuf); } + /* Allow ath9k_htc_rxep() to operate. */ + smp_wmb(); + priv->rx.initialized = true; + return 0; err: diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c index fe29ad4b9023..f315c54bd3ac 100644 --- a/drivers/net/wireless/ath/ath9k/wmi.c +++ b/drivers/net/wireless/ath/ath9k/wmi.c @@ -169,6 +169,10 @@ void ath9k_wmi_event_tasklet(struct tasklet_struct *t) &wmi->drv_priv->fatal_work); break; case WMI_TXSTATUS_EVENTID: + /* Check if ath9k_tx_init() completed. */ + if (!data_race(priv->tx.initialized)) + break; + spin_lock_bh(&priv->tx.tx_lock); if (priv->tx.flags & ATH9K_HTC_OP_TX_DRAIN) { spin_unlock_bh(&priv->tx.tx_lock); diff --git a/drivers/net/wireless/ath/carl9170/tx.c b/drivers/net/wireless/ath/carl9170/tx.c index 235cf77cd60c..43050dc7b98d 100644 --- a/drivers/net/wireless/ath/carl9170/tx.c +++ b/drivers/net/wireless/ath/carl9170/tx.c @@ -1557,6 +1557,9 @@ static struct carl9170_vif_info *carl9170_pick_beaconing_vif(struct ar9170 *ar) goto out; } } while (ar->beacon_enabled && i--); + + /* no entry found in list */ + return NULL; } out: diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c index 665b737fbb0d..39975b7d1a16 100644 --- a/drivers/net/wireless/broadcom/b43/phy_n.c +++ b/drivers/net/wireless/broadcom/b43/phy_n.c @@ -582,7 +582,7 @@ static void b43_nphy_adjust_lna_gain_table(struct b43_wldev *dev) u16 data[4]; s16 gain[2]; u16 minmax[2]; - static const u16 lna_gain[4] = { -2, 10, 19, 25 }; + static const s16 lna_gain[4] = { -2, 10, 19, 25 }; if (nphy->hang_avoid) b43_nphy_stay_in_carrier_search(dev, 1); diff --git a/drivers/net/wireless/broadcom/b43legacy/phy.c b/drivers/net/wireless/broadcom/b43legacy/phy.c index 05404fbd1e70..c1395e622759 100644 --- a/drivers/net/wireless/broadcom/b43legacy/phy.c +++ b/drivers/net/wireless/broadcom/b43legacy/phy.c @@ -1123,7 +1123,7 @@ void b43legacy_phy_lo_b_measure(struct b43legacy_wldev *dev) struct b43legacy_phy *phy = &dev->phy; u16 regstack[12] = { 0 }; u16 mls; - u16 fval; + s16 fval; int i; int j; diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c index d9baa2fa603b..e4c60caa6543 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c @@ -383,7 +383,7 @@ netdev_tx_t libipw_xmit(struct sk_buff *skb, struct net_device *dev) /* Each fragment may need to have room for encryption * pre/postfix */ - if (host_encrypt) + if (host_encrypt && crypt && crypt->ops) bytes_per_frag -= crypt->ops->extra_mpdu_prefix_len + crypt->ops->extra_mpdu_postfix_len; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c index c146303ec73b..66a968de3bc2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c @@ -621,6 +621,9 @@ static void iwl_mvm_power_get_vifs_iterator(void *_data, u8 *mac, struct iwl_power_vifs *power_iterator = _data; bool active = mvmvif->phy_ctxt && mvmvif->phy_ctxt->id < NUM_PHY_CTX; + if (!mvmvif->uploaded) + return; + switch (ieee80211_vif_type_p2p(vif)) { case NL80211_IFTYPE_P2P_DEVICE: break; diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c index d2ee6469e67b..3fa25cd64cda 100644 --- a/drivers/net/wireless/marvell/mwifiex/11h.c +++ b/drivers/net/wireless/marvell/mwifiex/11h.c @@ -303,5 +303,7 @@ void mwifiex_dfs_chan_sw_work_queue(struct work_struct *work) mwifiex_dbg(priv->adapter, MSG, "indicating channel switch completion to kernel\n"); + mutex_lock(&priv->wdev.mtx); cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef); + mutex_unlock(&priv->wdev.mtx); } diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c index 6bcc4a13ae6c..cc772045d526 100644 --- a/drivers/net/wireless/mediatek/mt7601u/usb.c +++ b/drivers/net/wireless/mediatek/mt7601u/usb.c @@ -26,6 +26,7 @@ static const struct usb_device_id mt7601u_device_table[] = { { USB_DEVICE(0x2717, 0x4106) }, { USB_DEVICE(0x2955, 0x0001) }, { USB_DEVICE(0x2955, 0x1001) }, + { USB_DEVICE(0x2955, 0x1003) }, { USB_DEVICE(0x2a5f, 0x1000) }, { USB_DEVICE(0x7392, 0x7710) }, { 0, } diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c index 2477e18c7cae..025619cd14e8 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c @@ -460,8 +460,10 @@ static void rtl8180_tx(struct ieee80211_hw *dev, struct rtl8180_priv *priv = dev->priv; struct rtl8180_tx_ring *ring; struct rtl8180_tx_desc *entry; + unsigned int prio = 0; unsigned long flags; - unsigned int idx, prio, hw_prio; + unsigned int idx, hw_prio; + dma_addr_t mapping; u32 tx_flags; u8 rc_flags; @@ -470,7 +472,9 @@ static void rtl8180_tx(struct ieee80211_hw *dev, /* do arithmetic and then convert to le16 */ u16 frame_duration = 0; - prio = skb_get_queue_mapping(skb); + /* rtl8180/rtl8185 only has one useable tx queue */ + if (dev->queues > IEEE80211_AC_BK) + prio = skb_get_queue_mapping(skb); ring = &priv->tx_ring[prio]; mapping = dma_map_single(&priv->pdev->dev, skb->data, skb->len, diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 06e073defad6..c6e4fda7e431 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1015,7 +1015,7 @@ int rtl_usb_probe(struct usb_interface *intf, hw = ieee80211_alloc_hw(sizeof(struct rtl_priv) + sizeof(struct rtl_usb_priv), &rtl_ops); if (!hw) { - WARN_ONCE(true, "rtl_usb: ieee80211 alloc failed\n"); + pr_warn("rtl_usb: ieee80211 alloc failed\n"); return -ENOMEM; } rtlpriv = hw->priv; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index dbac4c03d21a..a0335407be42 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -495,6 +495,7 @@ void xenvif_rx_action(struct xenvif_queue *queue) queue->rx_copy.completed = &completed_skbs; while (xenvif_rx_ring_slots_available(queue) && + !skb_queue_empty(&queue->rx_queue) && work_done < RX_BATCH_SIZE) { xenvif_rx_skb(queue); work_done++; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 1a69b5246133..569f3c8e7b75 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -66,6 +66,10 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644); MODULE_PARM_DESC(max_queues, "Maximum number of queues per virtual interface"); +static bool __read_mostly xennet_trusted = true; +module_param_named(trusted, xennet_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define XENNET_TIMEOUT (5 * HZ) static const struct ethtool_ops xennet_ethtool_ops; @@ -175,6 +179,9 @@ struct netfront_info { /* Is device behaving sane? */ bool broken; + /* Should skbs be bounced into a zeroed buffer? */ + bool bounce; + atomic_t rx_gso_checksum_fixup; }; @@ -273,7 +280,8 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) if (unlikely(!skb)) return NULL; - page = page_pool_dev_alloc_pages(queue->page_pool); + page = page_pool_alloc_pages(queue->page_pool, + GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO); if (unlikely(!page)) { kfree_skb(skb); return NULL; @@ -669,6 +677,33 @@ static int xennet_xdp_xmit(struct net_device *dev, int n, return n - drops; } +struct sk_buff *bounce_skb(const struct sk_buff *skb) +{ + unsigned int headerlen = skb_headroom(skb); + /* Align size to allocate full pages and avoid contiguous data leaks */ + unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len, + XEN_PAGE_SIZE); + struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO); + + if (!n) + return NULL; + + if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) { + WARN_ONCE(1, "misaligned skb allocated\n"); + kfree_skb(n); + return NULL; + } + + /* Set the data pointer */ + skb_reserve(n, headerlen); + /* Set the tail pointer and length */ + skb_put(n, skb->len); + + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); + + skb_copy_header(n, skb); + return n; +} #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) @@ -722,9 +757,13 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev /* The first req should be at least ETH_HLEN size or the packet will be * dropped by netback. + * + * If the backend is not trusted bounce all data to zeroed pages to + * avoid exposing contiguous data on the granted page not belonging to + * the skb. */ - if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { - nskb = skb_copy(skb, GFP_ATOMIC); + if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) { + nskb = bounce_skb(skb); if (!nskb) goto drop; dev_consume_skb_any(skb); @@ -1057,8 +1096,10 @@ static int xennet_get_responses(struct netfront_queue *queue, } } rcu_read_unlock(); -next: + __skb_queue_tail(list, skb); + +next: if (!(rx->flags & XEN_NETRXF_more_data)) break; @@ -2248,6 +2289,10 @@ static int talk_to_netback(struct xenbus_device *dev, info->netdev->irq = 0; + /* Check if backend is trusted. */ + info->bounce = !xennet_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + /* Check if backend supports multiple queues */ max_queues = xenbus_read_unsigned(info->xbdev->otherend, "multi-queue-max-queues", 1); @@ -2414,6 +2459,9 @@ static int xennet_connect(struct net_device *dev) return err; if (np->netback_has_xdp_headroom) pr_info("backend supports XDP headroom\n"); + if (np->bounce) + dev_info(&np->xbdev->dev, + "bouncing transmitted data to zeroed pages\n"); /* talk_to_netback() sets the correct number of queues */ num_queues = dev->real_num_tx_queues; diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index 18cd96284b77..f81f1cae9324 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -186,9 +186,9 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node, pdata->irq_polarity = IRQF_TRIGGER_RISING; ret = irq_of_parse_and_map(node, 0); - if (ret < 0) { - pr_err("Unable to get irq, error: %d\n", ret); - return ret; + if (!ret) { + pr_err("Unable to get irq\n"); + return -EINVAL; } pdata->irq = ret; diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c index 8e0ddb434770..1f4120e3314b 100644 --- a/drivers/nfc/nfcmrvl/spi.c +++ b/drivers/nfc/nfcmrvl/spi.c @@ -129,9 +129,9 @@ static int nfcmrvl_spi_parse_dt(struct device_node *node, } ret = irq_of_parse_and_map(node, 0); - if (ret < 0) { - pr_err("Unable to get irq, error: %d\n", ret); - return ret; + if (!ret) { + pr_err("Unable to get irq\n"); + return -EINVAL; } pdata->irq = ret; diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c index 888e298f610b..f26986eb53f1 100644 --- a/drivers/nfc/nfcmrvl/usb.c +++ b/drivers/nfc/nfcmrvl/usb.c @@ -401,13 +401,25 @@ static void nfcmrvl_play_deferred(struct nfcmrvl_usb_drv_data *drv_data) int err; while ((urb = usb_get_from_anchor(&drv_data->deferred))) { + usb_anchor_urb(urb, &drv_data->tx_anchor); + err = usb_submit_urb(urb, GFP_ATOMIC); - if (err) + if (err) { + kfree(urb->setup_packet); + usb_unanchor_urb(urb); + usb_free_urb(urb); break; + } drv_data->tx_in_flight++; + usb_free_urb(urb); + } + + /* Cleanup the rest deferred urbs. */ + while ((urb = usb_get_from_anchor(&drv_data->deferred))) { + kfree(urb->setup_packet); + usb_free_urb(urb); } - usb_scuttle_anchored_urbs(&drv_data->deferred); } static int nfcmrvl_resume(struct usb_interface *intf) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 9f60e4dc5a90..f426dcdfcdd6 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -122,7 +122,9 @@ static int nxp_nci_i2c_fw_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, &header, NXP_NCI_FW_HDR_LEN); r = i2c_master_recv(client, skb_put(*skb, frame_len), frame_len); - if (r != frame_len) { + if (r < 0) { + goto fw_read_exit_free_skb; + } else if (r != frame_len) { nfc_err(&client->dev, "Invalid frame length: %u (expected %zu)\n", r, frame_len); @@ -162,8 +164,13 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, (void *)&header, NCI_CTRL_HDR_SIZE); + if (!header.plen) + return 0; + r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen); - if (r != header.plen) { + if (r < 0) { + goto nci_read_exit_free_skb; + } else if (r != header.plen) { nfc_err(&client->dev, "Invalid frame payload length: %u (expected %u)\n", r, header.plen); diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index d2c011615775..8d7e29d953b7 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -2844,13 +2844,14 @@ void pn53x_common_clean(struct pn533 *priv) { struct pn533_cmd *cmd, *n; + /* delete the timer before cleanup the worker */ + del_timer_sync(&priv->listen_timer); + flush_delayed_work(&priv->poll_work); destroy_workqueue(priv->wq); skb_queue_purge(&priv->resp_q); - del_timer(&priv->listen_timer); - list_for_each_entry_safe(cmd, n, &priv->cmd_queue, queue) { list_del(&cmd->queue); kfree(cmd); diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c index 0841e0e370a0..d41636504246 100644 --- a/drivers/nfc/st21nfca/se.c +++ b/drivers/nfc/st21nfca/se.c @@ -241,7 +241,7 @@ int st21nfca_hci_se_io(struct nfc_hci_dev *hdev, u32 se_idx, } EXPORT_SYMBOL(st21nfca_hci_se_io); -static void st21nfca_se_wt_timeout(struct timer_list *t) +static void st21nfca_se_wt_work(struct work_struct *work) { /* * No answer from the secure element @@ -254,8 +254,9 @@ static void st21nfca_se_wt_timeout(struct timer_list *t) */ /* hardware reset managed through VCC_UICC_OUT power supply */ u8 param = 0x01; - struct st21nfca_hci_info *info = from_timer(info, t, - se_info.bwi_timer); + struct st21nfca_hci_info *info = container_of(work, + struct st21nfca_hci_info, + se_info.timeout_work); pr_debug("\n"); @@ -273,6 +274,13 @@ static void st21nfca_se_wt_timeout(struct timer_list *t) info->se_info.cb(info->se_info.cb_context, NULL, 0, -ETIME); } +static void st21nfca_se_wt_timeout(struct timer_list *t) +{ + struct st21nfca_hci_info *info = from_timer(info, t, se_info.bwi_timer); + + schedule_work(&info->se_info.timeout_work); +} + static void st21nfca_se_activation_timeout(struct timer_list *t) { struct st21nfca_hci_info *info = from_timer(info, t, @@ -296,6 +304,8 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host, int r = 0; struct device *dev = &hdev->ndev->dev; struct nfc_evt_transaction *transaction; + u32 aid_len; + u8 params_len; pr_debug("connectivity gate event: %x\n", event); @@ -304,43 +314,48 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host, r = nfc_se_connectivity(hdev->ndev, host); break; case ST21NFCA_EVT_TRANSACTION: - /* - * According to specification etsi 102 622 + /* According to specification etsi 102 622 * 11.2.2.4 EVT_TRANSACTION Table 52 * Description Tag Length * AID 81 5 to 16 * PARAMETERS 82 0 to 255 + * + * The key differences are aid storage length is variably sized + * in the packet, but fixed in nfc_evt_transaction, and that the aid_len + * is u8 in the packet, but u32 in the structure, and the tags in + * the packet are not included in nfc_evt_transaction. + * + * size in bytes: 1 1 5-16 1 1 0-255 + * offset: 0 1 2 aid_len + 2 aid_len + 3 aid_len + 4 + * member name: aid_tag(M) aid_len aid params_tag(M) params_len params + * example: 0x81 5-16 X 0x82 0-255 X */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 && - skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) + if (skb->len < 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) return -EPROTO; - transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL); + aid_len = skb->data[1]; + + if (skb->len < aid_len + 4 || aid_len > sizeof(transaction->aid)) + return -EPROTO; + + params_len = skb->data[aid_len + 3]; + + /* Verify PARAMETERS tag is (82), and final check that there is enough + * space in the packet to read everything. + */ + if ((skb->data[aid_len + 2] != NFC_EVT_TRANSACTION_PARAMS_TAG) || + (skb->len < aid_len + 4 + params_len)) + return -EPROTO; + + transaction = devm_kzalloc(dev, sizeof(*transaction) + params_len, GFP_KERNEL); if (!transaction) return -ENOMEM; - transaction->aid_len = skb->data[1]; + transaction->aid_len = aid_len; + transaction->params_len = params_len; - /* Checking if the length of the AID is valid */ - if (transaction->aid_len > sizeof(transaction->aid)) - return -EINVAL; - - memcpy(transaction->aid, &skb->data[2], - transaction->aid_len); - - /* Check next byte is PARAMETERS tag (82) */ - if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) - return -EPROTO; - - transaction->params_len = skb->data[transaction->aid_len + 3]; - - /* Total size is allocated (skb->len - 2) minus fixed array members */ - if (transaction->params_len > ((skb->len - 2) - sizeof(struct nfc_evt_transaction))) - return -EINVAL; - - memcpy(transaction->params, skb->data + - transaction->aid_len + 4, transaction->params_len); + memcpy(transaction->aid, &skb->data[2], aid_len); + memcpy(transaction->params, &skb->data[aid_len + 4], params_len); r = nfc_se_transaction(hdev->ndev, host, transaction); break; @@ -364,6 +379,7 @@ int st21nfca_apdu_reader_event_received(struct nfc_hci_dev *hdev, switch (event) { case ST21NFCA_EVT_TRANSMIT_DATA: del_timer_sync(&info->se_info.bwi_timer); + cancel_work_sync(&info->se_info.timeout_work); info->se_info.bwi_active = false; r = nfc_hci_send_event(hdev, ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_EVT_SE_END_OF_APDU_TRANSFER, NULL, 0); @@ -393,6 +409,7 @@ void st21nfca_se_init(struct nfc_hci_dev *hdev) struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev); init_completion(&info->se_info.req_completion); + INIT_WORK(&info->se_info.timeout_work, st21nfca_se_wt_work); /* initialize timers */ timer_setup(&info->se_info.bwi_timer, st21nfca_se_wt_timeout, 0); info->se_info.bwi_active = false; @@ -420,6 +437,7 @@ void st21nfca_se_deinit(struct nfc_hci_dev *hdev) if (info->se_info.se_active) del_timer_sync(&info->se_info.se_active_timer); + cancel_work_sync(&info->se_info.timeout_work); info->se_info.bwi_active = false; info->se_info.se_active = false; } diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h index 5e0de0fef1d4..0e4a93d11efb 100644 --- a/drivers/nfc/st21nfca/st21nfca.h +++ b/drivers/nfc/st21nfca/st21nfca.h @@ -141,6 +141,7 @@ struct st21nfca_se_info { se_io_cb_t cb; void *cb_context; + struct work_struct timeout_work; }; struct st21nfca_hci_info { diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 2304c6183822..9ec59960f216 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -187,8 +187,8 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data) ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1; /* make sure we are in the region */ - if (ctx->phys < nd_region->ndr_start - || (ctx->phys + ctx->cleared) > ndr_end) + if (ctx->phys < nd_region->ndr_start || + (ctx->phys + ctx->cleared - 1) > ndr_end) return 0; sector = (ctx->phys - nd_region->ndr_start) / 512; diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index c21ba0602029..1c92c883afdd 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -400,9 +400,7 @@ static ssize_t capability_show(struct device *dev, if (!nd_desc->fw_ops) return -EOPNOTSUPP; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); - nvdimm_bus_unlock(dev); switch (cap) { case NVDIMM_FWA_CAP_QUIESCE: @@ -427,10 +425,8 @@ static ssize_t activate_show(struct device *dev, if (!nd_desc->fw_ops) return -EOPNOTSUPP; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); state = nd_desc->fw_ops->activate_state(nd_desc); - nvdimm_bus_unlock(dev); if (cap < NVDIMM_FWA_CAP_QUIESCE) return -EOPNOTSUPP; @@ -475,7 +471,6 @@ static ssize_t activate_store(struct device *dev, else return -EINVAL; - nvdimm_bus_lock(dev); state = nd_desc->fw_ops->activate_state(nd_desc); switch (state) { @@ -493,7 +488,6 @@ static ssize_t activate_store(struct device *dev, default: rc = -ENXIO; } - nvdimm_bus_unlock(dev); if (rc == 0) rc = len; @@ -516,10 +510,7 @@ static umode_t nvdimm_bus_firmware_visible(struct kobject *kobj, struct attribut if (!nd_desc->fw_ops) return 0; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); - nvdimm_bus_unlock(dev); - if (cap < NVDIMM_FWA_CAP_QUIESCE) return 0; diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index 4b80150e4afa..b5aa55c61461 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -379,11 +379,6 @@ static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) || !nvdimm->sec.flags) return -EOPNOTSUPP; - if (dev->driver == NULL) { - dev_dbg(dev, "Unable to overwrite while DIMM active.\n"); - return -EINVAL; - } - rc = check_security_state(nvdimm); if (rc) return rc; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ad4f1cfbad2e..ab060b4911ff 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -531,36 +531,54 @@ EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU); static inline void nvme_clear_nvme_request(struct request *req) { - if (!(req->rq_flags & RQF_DONTPREP)) { - nvme_req(req)->retries = 0; - nvme_req(req)->flags = 0; - req->rq_flags |= RQF_DONTPREP; - } + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; } -struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) +static inline unsigned int nvme_req_op(struct nvme_command *cmd) { - unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; - struct request *req; + return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; +} - if (qid == NVME_QID_ANY) { - req = blk_mq_alloc_request(q, op, flags); - } else { - req = blk_mq_alloc_request_hctx(q, op, flags, - qid ? qid - 1 : 0); - } - if (IS_ERR(req)) - return req; +static inline void nvme_init_request(struct request *req, + struct nvme_command *cmd) +{ + if (req->q->queuedata) + req->timeout = NVME_IO_TIMEOUT; + else /* no queuedata implies admin queue */ + req->timeout = ADMIN_TIMEOUT; req->cmd_flags |= REQ_FAILFAST_DRIVER; nvme_clear_nvme_request(req); nvme_req(req)->cmd = cmd; +} +struct request *nvme_alloc_request(struct request_queue *q, + struct nvme_command *cmd, blk_mq_req_flags_t flags) +{ + struct request *req; + + req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); + if (!IS_ERR(req)) + nvme_init_request(req, cmd); return req; } EXPORT_SYMBOL_GPL(nvme_alloc_request); +struct request *nvme_alloc_request_qid(struct request_queue *q, + struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) +{ + struct request *req; + + req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, + qid ? qid - 1 : 0); + if (!IS_ERR(req)) + nvme_init_request(req, cmd); + return req; +} +EXPORT_SYMBOL_GPL(nvme_alloc_request_qid); + static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) { struct nvme_command c; @@ -663,7 +681,7 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9; } -static void nvme_setup_passthrough(struct request *req, +static inline void nvme_setup_passthrough(struct request *req, struct nvme_command *cmd) { memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); @@ -834,7 +852,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; blk_status_t ret = BLK_STS_OK; - nvme_clear_nvme_request(req); + if (!(req->rq_flags & RQF_DONTPREP)) + nvme_clear_nvme_request(req); memset(cmd, 0, sizeof(*cmd)); switch (req_op(req)) { @@ -923,11 +942,15 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, struct request *req; int ret; - req = nvme_alloc_request(q, cmd, flags, qid); + if (qid == NVME_QID_ANY) + req = nvme_alloc_request(q, cmd, flags); + else + req = nvme_alloc_request_qid(q, cmd, flags, qid); if (IS_ERR(req)) return PTR_ERR(req); - req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + if (timeout) + req->timeout = timeout; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); @@ -1093,11 +1116,12 @@ static int nvme_submit_user_cmd(struct request_queue *q, void *meta = NULL; int ret; - req = nvme_alloc_request(q, cmd, 0, NVME_QID_ANY); + req = nvme_alloc_request(q, cmd, 0); if (IS_ERR(req)) return PTR_ERR(req); - req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + if (timeout) + req->timeout = timeout; nvme_req(req)->flags |= NVME_REQ_USERCMD; if (ubuffer && bufflen) { @@ -1167,8 +1191,8 @@ static int nvme_keep_alive(struct nvme_ctrl *ctrl) { struct request *rq; - rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED, - NVME_QID_ANY); + rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, + BLK_MQ_REQ_RESERVED); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -2024,7 +2048,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); - blk_queue_dma_alignment(q, 7); + blk_queue_dma_alignment(q, 3); blk_queue_write_cache(q, vwc, vwc); } @@ -2675,6 +2699,34 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x14a4, .fr = "22301111", .quirks = NVME_QUIRK_SIMPLE_SUSPEND, + }, + { + /* + * This Kioxia CD6-V Series / HPE PE8030 device times out and + * aborts I/O during any load, but more easily reproducible + * with discards (fstrim). + * + * The device is left in a state where it is also not possible + * to use "nvme set-feature" to disable APST, but booting with + * nvme_core.default_ps_max_latency=0 works. + */ + .vid = 0x1e0f, + .mn = "KCD6XVUL6T40", + .quirks = NVME_QUIRK_NO_APST, + }, + { + /* + * The external Samsung X5 SSD fails initialization without a + * delay before checking if it is ready and has a whole set of + * other problems. To make this even more interesting, it + * shares the PCI ID with internal Samsung 970 Evo Plus that + * does not need or want these quirks. + */ + .vid = 0x144d, + .mn = "Samsung Portable SSD X5", + .quirks = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, } }; @@ -2813,8 +2865,8 @@ static ssize_t subsys_##field##_show(struct device *dev, \ { \ struct nvme_subsystem *subsys = \ container_of(dev, struct nvme_subsystem, dev); \ - return sprintf(buf, "%.*s\n", \ - (int)sizeof(subsys->field), subsys->field); \ + return sysfs_emit(buf, "%.*s\n", \ + (int)sizeof(subsys->field), subsys->field); \ } \ static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show); @@ -3335,13 +3387,13 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, int model_len = sizeof(subsys->model); if (!uuid_is_null(&ids->uuid)) - return sprintf(buf, "uuid.%pU\n", &ids->uuid); + return sysfs_emit(buf, "uuid.%pU\n", &ids->uuid); if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) - return sprintf(buf, "eui.%16phN\n", ids->nguid); + return sysfs_emit(buf, "eui.%16phN\n", ids->nguid); if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) - return sprintf(buf, "eui.%8phN\n", ids->eui64); + return sysfs_emit(buf, "eui.%8phN\n", ids->eui64); while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' || subsys->serial[serial_len - 1] == '\0')) @@ -3350,7 +3402,7 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, subsys->model[model_len - 1] == '\0')) model_len--; - return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, + return sysfs_emit(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, serial_len, subsys->serial, model_len, subsys->model, head->ns_id); } @@ -3359,7 +3411,7 @@ static DEVICE_ATTR_RO(wwid); static ssize_t nguid_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid); + return sysfs_emit(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid); } static DEVICE_ATTR_RO(nguid); @@ -3372,25 +3424,25 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, * we have no UUID set */ if (uuid_is_null(&ids->uuid)) { - printk_ratelimited(KERN_WARNING - "No UUID available providing old NGUID\n"); - return sprintf(buf, "%pU\n", ids->nguid); + dev_warn_ratelimited(dev, + "No UUID available providing old NGUID\n"); + return sysfs_emit(buf, "%pU\n", ids->nguid); } - return sprintf(buf, "%pU\n", &ids->uuid); + return sysfs_emit(buf, "%pU\n", &ids->uuid); } static DEVICE_ATTR_RO(uuid); static ssize_t eui_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64); + return sysfs_emit(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64); } static DEVICE_ATTR_RO(eui); static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev_to_ns_head(dev)->ns_id); + return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ns_id); } static DEVICE_ATTR_RO(nsid); @@ -3455,7 +3507,7 @@ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ - return sprintf(buf, "%.*s\n", \ + return sysfs_emit(buf, "%.*s\n", \ (int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \ } \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); @@ -3469,7 +3521,7 @@ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ - return sprintf(buf, "%d\n", ctrl->field); \ + return sysfs_emit(buf, "%d\n", ctrl->field); \ } \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); @@ -3517,9 +3569,9 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && state_name[ctrl->state]) - return sprintf(buf, "%s\n", state_name[ctrl->state]); + return sysfs_emit(buf, "%s\n", state_name[ctrl->state]); - return sprintf(buf, "unknown state\n"); + return sysfs_emit(buf, "unknown state\n"); } static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL); @@ -3571,9 +3623,9 @@ static ssize_t nvme_ctrl_loss_tmo_show(struct device *dev, struct nvmf_ctrl_options *opts = ctrl->opts; if (ctrl->opts->max_reconnects == -1) - return sprintf(buf, "off\n"); - return sprintf(buf, "%d\n", - opts->max_reconnects * opts->reconnect_delay); + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", + opts->max_reconnects * opts->reconnect_delay); } static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev, @@ -3603,8 +3655,8 @@ static ssize_t nvme_ctrl_reconnect_delay_show(struct device *dev, struct nvme_ctrl *ctrl = dev_get_drvdata(dev); if (ctrl->opts->reconnect_delay == -1) - return sprintf(buf, "off\n"); - return sprintf(buf, "%d\n", ctrl->opts->reconnect_delay); + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", ctrl->opts->reconnect_delay); } static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev, @@ -4408,6 +4460,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); + if (ctrl->ops->stop_ctrl) + ctrl->ops->stop_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); @@ -4420,6 +4474,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); nvme_start_queues(ctrl); + nvme_mpath_update(ctrl); } } EXPORT_SYMBOL_GPL(nvme_start_ctrl); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 8e562d0f2c30..470cef3abec3 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -653,7 +653,7 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q, nvme_nvm_rqtocmd(rqd, ns, cmd); - rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); + rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0); if (IS_ERR(rq)) return rq; @@ -767,14 +767,14 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q, DECLARE_COMPLETION_ONSTACK(wait); int ret = 0; - rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0, - NVME_QID_ANY); + rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0); if (IS_ERR(rq)) { ret = -ENOMEM; goto err_cmd; } - rq->timeout = timeout ? timeout : ADMIN_TIMEOUT; + if (timeout) + rq->timeout = timeout; if (ppa_buf && ppa_len) { ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 18a756444d5a..379d6818a063 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -484,8 +484,17 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); - - if (nvme_state_is_live(ns->ana_state)) + /* + * nvme_mpath_set_live() will trigger I/O to the multipath path device + * and in turn to this path device. However we cannot accept this I/O + * if the controller is not live. This may deadlock if called from + * nvme_mpath_init_identify() and the ctrl will never complete + * initialization, preventing I/O from completing. For this case we + * will reprocess the ANA log page in nvme_mpath_update() once the + * controller is ready. + */ + if (nvme_state_is_live(ns->ana_state) && + ns->ctrl->state == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); } @@ -572,6 +581,18 @@ static void nvme_ana_work(struct work_struct *work) nvme_read_ana_log(ctrl); } +void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ + u32 nr_change_groups = 0; + + if (!ctrl->ana_log_buf) + return; + + mutex_lock(&ctrl->ana_lock); + nvme_parse_ana_log(ctrl, &nr_change_groups, nvme_update_ana_state); + mutex_unlock(&ctrl->ana_lock); +} + static void nvme_anatt_timeout(struct timer_list *t) { struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer); @@ -603,8 +624,8 @@ static ssize_t nvme_subsys_iopolicy_show(struct device *dev, struct nvme_subsystem *subsys = container_of(dev, struct nvme_subsystem, dev); - return sprintf(buf, "%s\n", - nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]); + return sysfs_emit(buf, "%s\n", + nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]); } static ssize_t nvme_subsys_iopolicy_store(struct device *dev, @@ -629,7 +650,7 @@ SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR, static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid); + return sysfs_emit(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid); } DEVICE_ATTR_RO(ana_grpid); @@ -638,7 +659,7 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%s\n", nvme_ana_state_names[ns->ana_state]); + return sysfs_emit(buf, "%s\n", nvme_ana_state_names[ns->ana_state]); } DEVICE_ATTR_RO(ana_state); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 10e5ae3a8c0d..58cf9e39d613 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -478,6 +478,7 @@ struct nvme_ctrl_ops { void (*free_ctrl)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl); void (*delete_ctrl)(struct nvme_ctrl *ctrl); + void (*stop_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); }; @@ -662,6 +663,8 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, + struct nvme_command *cmd, blk_mq_req_flags_t flags); +struct request *nvme_alloc_request_qid(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid); void nvme_cleanup_cmd(struct request *req); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, @@ -712,6 +715,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); +void nvme_mpath_update(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); @@ -798,6 +802,9 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); return 0; } +static inline void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ +} static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6939b03a16c5..ce129655ef0a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -224,6 +224,7 @@ struct nvme_queue { */ struct nvme_iod { struct nvme_request req; + struct nvme_command cmd; struct nvme_queue *nvmeq; bool use_sgl; int aborted; @@ -917,7 +918,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_dev *dev = nvmeq->dev; struct request *req = bd->rq; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_command cmnd; + struct nvme_command *cmnd = &iod->cmd; blk_status_t ret; iod->aborted = 0; @@ -931,24 +932,24 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags))) return BLK_STS_IOERR; - ret = nvme_setup_cmd(ns, req, &cmnd); + ret = nvme_setup_cmd(ns, req, cmnd); if (ret) return ret; if (blk_rq_nr_phys_segments(req)) { - ret = nvme_map_data(dev, req, &cmnd); + ret = nvme_map_data(dev, req, cmnd); if (ret) goto out_free_cmd; } if (blk_integrity_rq(req)) { - ret = nvme_map_metadata(dev, req, &cmnd); + ret = nvme_map_metadata(dev, req, cmnd); if (ret) goto out_unmap_data; } blk_mq_start_request(req); - nvme_submit_cmd(nvmeq, &cmnd, bd->last); + nvme_submit_cmd(nvmeq, cmnd, bd->last); return BLK_STS_OK; out_unmap_data: nvme_unmap_data(dev, req); @@ -1350,13 +1351,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) req->tag, nvmeq->qid); abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, - BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); + BLK_MQ_REQ_NOWAIT); if (IS_ERR(abort_req)) { atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } - abort_req->timeout = ADMIN_TIMEOUT; abort_req->end_io_data = NULL; blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio); @@ -1666,6 +1666,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); if (IS_ERR(dev->ctrl.admin_q)) { blk_mq_free_tag_set(&dev->admin_tagset); + dev->ctrl.admin_q = NULL; return -ENOMEM; } if (!blk_get_queue(dev->ctrl.admin_q)) { @@ -2278,11 +2279,10 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) cmd.delete_queue.opcode = opcode; cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); - req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); + req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT); if (IS_ERR(req)) return PTR_ERR(req); - req->timeout = ADMIN_TIMEOUT; req->end_io_data = nvmeq; init_completion(&nvmeq->delete_done); @@ -3234,7 +3234,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DISABLE_WRITE_ZEROES| NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ - .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, @@ -3245,7 +3246,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ - .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, @@ -3265,7 +3267,6 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS | NVME_QUIRK_SKIP_CID_GEN }, - { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 8eacc9bd58f5..b61924394032 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1057,6 +1057,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, } } +static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + + cancel_work_sync(&ctrl->err_work); + cancel_delayed_work_sync(&ctrl->reconnect_work); +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -2236,9 +2244,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&ctrl->err_work); - cancel_delayed_work_sync(&ctrl->reconnect_work); - nvme_rdma_teardown_io_queues(ctrl, shutdown); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); if (shutdown) @@ -2288,6 +2293,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_rdma_stop_ctrl, }; /* diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 7e3932033707..fe8c27bbc3f2 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1149,8 +1149,7 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) } else if (ret < 0) { dev_err(queue->ctrl->ctrl.device, "failed to send request %d\n", ret); - if (ret != -EPIPE && ret != -ECONNRESET) - nvme_tcp_fail_request(queue->request); + nvme_tcp_fail_request(queue->request); nvme_tcp_done_send_req(queue); } return ret; @@ -2136,9 +2135,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); - cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); - nvme_tcp_teardown_io_queues(ctrl, shutdown); blk_mq_quiesce_queue(ctrl->admin_q); if (shutdown) @@ -2178,6 +2174,12 @@ static void nvme_reset_ctrl_work(struct work_struct *work) nvme_tcp_reconnect_or_remove(ctrl); } +static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) +{ + cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); + cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); +} + static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); @@ -2500,6 +2502,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .submit_async_event = nvme_tcp_submit_async_event, .delete_ctrl = nvme_tcp_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_tcp_stop_ctrl, }; static bool diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 8ee94f056898..d24251ece502 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -244,7 +244,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) q = ns->queue; } - rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY); + rq = nvme_alloc_request(q, req->cmd, 0); if (IS_ERR(rq)) { status = NVME_SC_INTERNAL; goto out_put_ns; diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 43a77d720008..c8a0c0e9dec1 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -170,9 +170,7 @@ static int overlay_notify(struct overlay_changeset *ovcs, ret = blocking_notifier_call_chain(&overlay_notify_chain, action, &nd); - if (ret == NOTIFY_OK || ret == NOTIFY_STOP) - return 0; - if (ret) { + if (notifier_to_errno(ret)) { ret = notifier_to_errno(ret); pr_err("overlay changeset %s notifier error %d, target: %pOF\n", of_overlay_action_name[action], ret, nd.target); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 446862368949..b163f5d13910 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -346,11 +346,11 @@ static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table) /* Checking only first OPP is sufficient */ np = of_get_next_available_child(opp_np, NULL); + of_node_put(opp_np); if (!np) { dev_err(dev, "OPP table empty\n"); return -EINVAL; } - of_node_put(opp_np); prop = of_find_property(np, "opp-peak-kBps", NULL); of_node_put(np); diff --git a/drivers/pci/controller/cadence/pcie-cadence-ep.c b/drivers/pci/controller/cadence/pcie-cadence-ep.c index 1af14474abcf..4c5e6349d78c 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-ep.c +++ b/drivers/pci/controller/cadence/pcie-cadence-ep.c @@ -154,8 +154,7 @@ static int cdns_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, phys_addr_t addr, struct cdns_pcie *pcie = &ep->pcie; u32 r; - r = find_first_zero_bit(&ep->ob_region_map, - sizeof(ep->ob_region_map) * BITS_PER_LONG); + r = find_first_zero_bit(&ep->ob_region_map, BITS_PER_LONG); if (r >= ep->max_regions - 1) { dev_err(&epc->dev, "no free outbound region\n"); return -EINVAL; diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 5cf1ef12fb9b..ceb4815379cd 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -401,6 +401,11 @@ static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie) dev_err(dev, "failed to disable vpcie regulator: %d\n", ret); } + + /* Some boards don't have PCIe reset GPIO. */ + if (gpio_is_valid(imx6_pcie->reset_gpio)) + gpio_set_value_cansleep(imx6_pcie->reset_gpio, + imx6_pcie->gpio_active_high); } static unsigned int imx6_pcie_grp_offset(const struct imx6_pcie *imx6_pcie) @@ -523,15 +528,6 @@ static void imx6_pcie_deassert_core_reset(struct imx6_pcie *imx6_pcie) /* allow the clocks to stabilize */ usleep_range(200, 500); - /* Some boards don't have PCIe reset GPIO. */ - if (gpio_is_valid(imx6_pcie->reset_gpio)) { - gpio_set_value_cansleep(imx6_pcie->reset_gpio, - imx6_pcie->gpio_active_high); - msleep(100); - gpio_set_value_cansleep(imx6_pcie->reset_gpio, - !imx6_pcie->gpio_active_high); - } - switch (imx6_pcie->drvdata->variant) { case IMX8MQ: reset_control_deassert(imx6_pcie->pciephy_reset); @@ -574,6 +570,15 @@ static void imx6_pcie_deassert_core_reset(struct imx6_pcie *imx6_pcie) break; } + /* Some boards don't have PCIe reset GPIO. */ + if (gpio_is_valid(imx6_pcie->reset_gpio)) { + msleep(100); + gpio_set_value_cansleep(imx6_pcie->reset_gpio, + !imx6_pcie->gpio_active_high); + /* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */ + msleep(100); + } + return; err_ref_clk: diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index dc0d135e81df..cd34f1c4f77d 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -393,7 +393,8 @@ int dw_pcie_host_init(struct pcie_port *pp) sizeof(pp->msi_msg), DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(pci->dev, pp->msi_data)) { + ret = dma_mapping_error(pci->dev, pp->msi_data); + if (ret) { dev_err(pci->dev, "Failed to map MSI data\n"); pp->msi_data = 0; goto err_free_msi; diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 557554f53ce9..1b8b3c12eece 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1192,12 +1192,6 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie) goto err_disable_clocks; } - ret = clk_prepare_enable(res->pipe_clk); - if (ret) { - dev_err(dev, "cannot prepare/enable pipe clock\n"); - goto err_disable_clocks; - } - /* configure PCIe to RC mode */ writel(DEVICE_TYPE_RC, pcie->parf + PCIE20_PARF_DEVICE_TYPE); @@ -1443,22 +1437,21 @@ static int qcom_pcie_probe(struct platform_device *pdev) } ret = phy_init(pcie->phy); - if (ret) { - pm_runtime_disable(&pdev->dev); + if (ret) goto err_pm_runtime_put; - } platform_set_drvdata(pdev, pcie); ret = dw_pcie_host_init(pp); if (ret) { dev_err(dev, "cannot initialize host\n"); - pm_runtime_disable(&pdev->dev); - goto err_pm_runtime_put; + goto err_phy_exit; } return 0; +err_phy_exit: + phy_exit(pcie->phy); err_pm_runtime_put: pm_runtime_put(dev); pm_runtime_disable(dev); diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index a070e69bb49c..4353443b89d8 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1118,6 +1118,10 @@ static void hv_int_desc_free(struct hv_pci_dev *hpdev, u8 buffer[sizeof(struct pci_delete_interrupt)]; } ctxt; + if (!int_desc->vector_count) { + kfree(int_desc); + return; + } memset(&ctxt, 0, sizeof(ctxt)); int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; int_pkt->message_type.type = @@ -1180,6 +1184,28 @@ static void hv_irq_mask(struct irq_data *data) pci_msi_mask_irq(data); } +static unsigned int hv_msi_get_int_vector(struct irq_data *data) +{ + struct irq_cfg *cfg = irqd_cfg(data); + + return cfg->vector; +} + +static int hv_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *info) +{ + int ret = pci_msi_prepare(domain, dev, nvec, info); + + /* + * By using the interrupt remapper in the hypervisor IOMMU, contiguous + * CPU vectors is not needed for multi-MSI + */ + if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) + info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; + + return ret; +} + /** * hv_irq_unmask() - "Unmask" the IRQ by setting its current * affinity. @@ -1195,6 +1221,7 @@ static void hv_irq_unmask(struct irq_data *data) struct msi_desc *msi_desc = irq_data_get_msi_desc(data); struct irq_cfg *cfg = irqd_cfg(data); struct hv_retarget_device_interrupt *params; + struct tran_int_desc *int_desc; struct hv_pcibus_device *hbus; struct cpumask *dest; cpumask_var_t tmp; @@ -1209,6 +1236,7 @@ static void hv_irq_unmask(struct irq_data *data) pdev = msi_desc_to_pci_dev(msi_desc); pbus = pdev->bus; hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); + int_desc = data->chip_data; spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags); @@ -1216,7 +1244,8 @@ static void hv_irq_unmask(struct irq_data *data) memset(params, 0, sizeof(*params)); params->partition_id = HV_PARTITION_ID_SELF; params->int_entry.source = 1; /* MSI(-X) */ - hv_set_msi_entry_from_desc(¶ms->int_entry.msi_entry, msi_desc); + params->int_entry.msi_entry.address = int_desc->address & 0xffffffff; + params->int_entry.msi_entry.data = int_desc->data; params->device_id = (hbus->hdev->dev_instance.b[5] << 24) | (hbus->hdev->dev_instance.b[4] << 16) | (hbus->hdev->dev_instance.b[7] << 8) | @@ -1317,12 +1346,12 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp, static u32 hv_compose_msi_req_v1( struct pci_create_interrupt *int_pkt, struct cpumask *affinity, - u32 slot, u8 vector) + u32 slot, u8 vector, u8 vector_count) { int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; - int_pkt->int_desc.vector_count = 1; + int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = dest_Fixed; /* @@ -1336,14 +1365,14 @@ static u32 hv_compose_msi_req_v1( static u32 hv_compose_msi_req_v2( struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity, - u32 slot, u8 vector) + u32 slot, u8 vector, u8 vector_count) { int cpu; int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; - int_pkt->int_desc.vector_count = 1; + int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = dest_Fixed; /* @@ -1371,7 +1400,6 @@ static u32 hv_compose_msi_req_v2( */ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); struct hv_pcibus_device *hbus; struct vmbus_channel *channel; struct hv_pci_dev *hpdev; @@ -1380,6 +1408,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct cpumask *dest; struct compose_comp_ctxt comp; struct tran_int_desc *int_desc; + struct msi_desc *msi_desc; + u8 vector, vector_count; struct { struct pci_packet pci_pkt; union { @@ -1391,7 +1421,17 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) u32 size; int ret; - pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); + /* Reuse the previous allocation */ + if (data->chip_data) { + int_desc = data->chip_data; + msg->address_hi = int_desc->address >> 32; + msg->address_lo = int_desc->address & 0xffffffff; + msg->data = int_desc->data; + return; + } + + msi_desc = irq_data_get_msi_desc(data); + pdev = msi_desc_to_pci_dev(msi_desc); dest = irq_data_get_effective_affinity_mask(data); pbus = pdev->bus; hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); @@ -1400,17 +1440,40 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) if (!hpdev) goto return_null_message; - /* Free any previous message that might have already been composed. */ - if (data->chip_data) { - int_desc = data->chip_data; - data->chip_data = NULL; - hv_int_desc_free(hpdev, int_desc); - } - int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); if (!int_desc) goto drop_reference; + if (!msi_desc->msi_attrib.is_msix && msi_desc->nvec_used > 1) { + /* + * If this is not the first MSI of Multi MSI, we already have + * a mapping. Can exit early. + */ + if (msi_desc->irq != data->irq) { + data->chip_data = int_desc; + int_desc->address = msi_desc->msg.address_lo | + (u64)msi_desc->msg.address_hi << 32; + int_desc->data = msi_desc->msg.data + + (data->irq - msi_desc->irq); + msg->address_hi = msi_desc->msg.address_hi; + msg->address_lo = msi_desc->msg.address_lo; + msg->data = int_desc->data; + put_pcichild(hpdev); + return; + } + /* + * The vector we select here is a dummy value. The correct + * value gets sent to the hypervisor in unmask(). This needs + * to be aligned with the count, and also not zero. Multi-msi + * is powers of 2 up to 32, so 32 will always work here. + */ + vector = 32; + vector_count = msi_desc->nvec_used; + } else { + vector = hv_msi_get_int_vector(data); + vector_count = 1; + } + memset(&ctxt, 0, sizeof(ctxt)); init_completion(&comp.comp_pkt.host_event); ctxt.pci_pkt.completion_func = hv_pci_compose_compl; @@ -1421,7 +1484,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, dest, hpdev->desc.win_slot.slot, - cfg->vector); + vector, + vector_count); break; case PCI_PROTOCOL_VERSION_1_2: @@ -1429,7 +1493,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, dest, hpdev->desc.win_slot.slot, - cfg->vector); + vector, + vector_count); break; default: @@ -1545,7 +1610,7 @@ static struct irq_chip hv_msi_irq_chip = { }; static struct msi_domain_ops hv_msi_ops = { - .msi_prepare = pci_msi_prepare, + .msi_prepare = hv_msi_prepare, .msi_free = hv_msi_free, }; diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 7631dc3961c1..379cde59988c 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -264,8 +264,7 @@ static int rockchip_pcie_ep_map_addr(struct pci_epc *epc, u8 fn, struct rockchip_pcie *pcie = &ep->rockchip; u32 r; - r = find_first_zero_bit(&ep->ob_region_map, - sizeof(ep->ob_region_map) * BITS_PER_LONG); + r = find_first_zero_bit(&ep->ob_region_map, BITS_PER_LONG); /* * Region 0 is reserved for configuration space and shouldn't * be used elsewhere per TRM, so leave it out. diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9028493dd5c9..c909cc72f801 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2837,6 +2837,18 @@ static const struct dmi_system_id bridge_d3_blacklist[] = { DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"), }, }, + { + /* + * Downstream device is not accessible after putting a root port + * into D3cold and back into D0 on Elo i2. + */ + .ident = "Elo i2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"), + DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"), + DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"), + }, + }, #endif { } }; @@ -4972,18 +4984,18 @@ static int pci_dev_reset_slot_function(struct pci_dev *dev, int probe) static void pci_dev_lock(struct pci_dev *dev) { - pci_cfg_access_lock(dev); /* block PM suspend, driver probe, etc. */ device_lock(&dev->dev); + pci_cfg_access_lock(dev); } /* Return 1 on successful lock, 0 on contention */ static int pci_dev_trylock(struct pci_dev *dev) { - if (pci_cfg_access_trylock(dev)) { - if (device_trylock(&dev->dev)) + if (device_trylock(&dev->dev)) { + if (pci_cfg_access_trylock(dev)) return 1; - pci_cfg_access_unlock(dev); + device_unlock(&dev->dev); } return 0; @@ -4991,8 +5003,8 @@ static int pci_dev_trylock(struct pci_dev *dev) static void pci_dev_unlock(struct pci_dev *dev) { - device_unlock(&dev->dev); pci_cfg_access_unlock(dev); + device_unlock(&dev->dev); } static void pci_dev_save_and_disable(struct pci_dev *dev) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 197075f71fd6..66102ceb9f3f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -591,11 +591,8 @@ static inline void pcie_ecrc_get_policy(char *str) { } #ifdef CONFIG_PCIE_PTM void pci_ptm_init(struct pci_dev *dev); -int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); #else static inline void pci_ptm_init(struct pci_dev *dev) { } -static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity) -{ return -EINVAL; } #endif struct pci_dev_reset_methods { diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 65dff5f3457a..c40546eeecb3 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -101,6 +101,11 @@ struct aer_stats { #define ERR_COR_ID(d) (d & 0xffff) #define ERR_UNCOR_ID(d) (d >> 16) +#define AER_ERR_STATUS_MASK (PCI_ERR_ROOT_UNCOR_RCV | \ + PCI_ERR_ROOT_COR_RCV | \ + PCI_ERR_ROOT_MULTI_COR_RCV | \ + PCI_ERR_ROOT_MULTI_UNCOR_RCV) + static int pcie_aer_disable; static pci_ers_result_t aer_root_reset(struct pci_dev *dev); @@ -1187,7 +1192,7 @@ static irqreturn_t aer_irq(int irq, void *context) struct aer_err_source e_src = {}; pci_read_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, &e_src.status); - if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) + if (!(e_src.status & AER_ERR_STATUS_MASK)) return IRQ_NONE; pci_read_config_dword(rp, aer + PCI_ERR_ROOT_ERR_SRC, &e_src.id); diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig index 82d10b6661c7..73508fca520c 100644 --- a/drivers/pcmcia/Kconfig +++ b/drivers/pcmcia/Kconfig @@ -151,7 +151,7 @@ config TCIC config PCMCIA_ALCHEMY_DEVBOARD tristate "Alchemy Db/Pb1xxx PCMCIA socket services" - depends on MIPS_ALCHEMY && PCMCIA + depends on MIPS_DB1XXX && PCMCIA help Enable this driver of you want PCMCIA support on your Alchemy Db1000, Db/Pb1100, Db/Pb1500, Db/Pb1550, Db/Pb1200, DB1300 diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c index 0cda16846962..afcc82ab3202 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp.c @@ -3141,7 +3141,7 @@ static int qcom_qmp_phy_power_on(struct phy *phy) ret = reset_control_deassert(qmp->ufs_reset); if (ret) - goto err_lane_rst; + goto err_pcs_ready; qcom_qmp_phy_configure(pcs_misc, cfg->regs, cfg->pcs_misc_tbl, cfg->pcs_misc_tbl_num); @@ -3717,6 +3717,11 @@ static const struct phy_ops qcom_qmp_pcie_ufs_ops = { .owner = THIS_MODULE, }; +static void qcom_qmp_reset_control_put(void *data) +{ + reset_control_put(data); +} + static int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, void __iomem *serdes, const struct qmp_phy_cfg *cfg) @@ -3789,7 +3794,7 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, * all phys that don't need this. */ snprintf(prop_name, sizeof(prop_name), "pipe%d", id); - qphy->pipe_clk = of_clk_get_by_name(np, prop_name); + qphy->pipe_clk = devm_get_clk_from_child(dev, np, prop_name); if (IS_ERR(qphy->pipe_clk)) { if (cfg->type == PHY_TYPE_PCIE || cfg->type == PHY_TYPE_USB3) { @@ -3811,6 +3816,10 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, dev_err(dev, "failed to get lane%d reset\n", id); return PTR_ERR(qphy->lane_rst); } + ret = devm_add_action_or_reset(dev, qcom_qmp_reset_control_put, + qphy->lane_rst); + if (ret) + return ret; } if (cfg->type == PHY_TYPE_UFS || cfg->type == PHY_TYPE_PCIE) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 5c1a109842a7..c2ba4064ce5b 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -1224,18 +1224,12 @@ FUNC_GROUP_DECL(SALT8, AA12); FUNC_GROUP_DECL(WDTRST4, AA12); #define AE12 196 -SIG_EXPR_LIST_DECL_SEMG(AE12, FWSPIDQ2, FWQSPID, FWSPID, - SIG_DESC_SET(SCU438, 4)); SIG_EXPR_LIST_DECL_SESG(AE12, GPIOY4, GPIOY4); -PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIDQ2), - SIG_EXPR_LIST_PTR(AE12, GPIOY4)); +PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, GPIOY4)); #define AF12 197 -SIG_EXPR_LIST_DECL_SEMG(AF12, FWSPIDQ3, FWQSPID, FWSPID, - SIG_DESC_SET(SCU438, 5)); SIG_EXPR_LIST_DECL_SESG(AF12, GPIOY5, GPIOY5); -PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIDQ3), - SIG_EXPR_LIST_PTR(AF12, GPIOY5)); +PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, GPIOY5)); #define AC12 198 SSSF_PIN_DECL(AC12, GPIOY6, FWSPIABR, SIG_DESC_SET(SCU438, 6)); @@ -1508,9 +1502,8 @@ SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3)); PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7); GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4); -GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12); GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4); -FUNC_DECL_2(FWSPID, FWSPID, FWQSPID); +FUNC_DECL_1(FWSPID, FWSPID); FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4); FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8); /* @@ -1906,7 +1899,6 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = { ASPEED_PINCTRL_GROUP(FSI2), ASPEED_PINCTRL_GROUP(FWSPIABR), ASPEED_PINCTRL_GROUP(FWSPID), - ASPEED_PINCTRL_GROUP(FWQSPID), ASPEED_PINCTRL_GROUP(FWSPIWP), ASPEED_PINCTRL_GROUP(GPIT0), ASPEED_PINCTRL_GROUP(GPIT1), diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index 9c65d560d48f..e792318c3894 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -235,11 +235,11 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function, const struct aspeed_sig_expr **funcs; const struct aspeed_sig_expr ***prios; - pr_debug("Muxing pin %s for %s\n", pdesc->name, pfunc->name); - if (!pdesc) return -EINVAL; + pr_debug("Muxing pin %s for %s\n", pdesc->name, pfunc->name); + prios = pdesc->prios; if (!prios) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 5cb018f98800..85a0052bb0e6 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -781,7 +781,7 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev, for (i = 0; i < nr_irq_parent; i++) { int irq = irq_of_parse_and_map(np, i); - if (irq < 0) + if (!irq) continue; girq->parents[i] = irq; } diff --git a/drivers/pinctrl/renesas/core.c b/drivers/pinctrl/renesas/core.c index 258972672eda..54f1a7334027 100644 --- a/drivers/pinctrl/renesas/core.c +++ b/drivers/pinctrl/renesas/core.c @@ -71,12 +71,11 @@ static int sh_pfc_map_resources(struct sh_pfc *pfc, /* Fill them. */ for (i = 0; i < num_windows; i++) { - res = platform_get_resource(pdev, IORESOURCE_MEM, i); - windows->phys = res->start; - windows->size = resource_size(res); - windows->virt = devm_ioremap_resource(pfc->dev, res); + windows->virt = devm_platform_get_and_ioremap_resource(pdev, i, &res); if (IS_ERR(windows->virt)) return -ENOMEM; + windows->phys = res->start; + windows->size = resource_size(res); windows++; } for (i = 0; i < num_irqs; i++) diff --git a/drivers/pinctrl/renesas/pinctrl-rzn1.c b/drivers/pinctrl/renesas/pinctrl-rzn1.c index ef5fb25b6016..849d091205d4 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzn1.c +++ b/drivers/pinctrl/renesas/pinctrl-rzn1.c @@ -865,17 +865,15 @@ static int rzn1_pinctrl_probe(struct platform_device *pdev) ipctl->mdio_func[0] = -1; ipctl->mdio_func[1] = -1; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ipctl->lev1_protect_phys = (u32)res->start + 0x400; - ipctl->lev1 = devm_ioremap_resource(&pdev->dev, res); + ipctl->lev1 = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(ipctl->lev1)) return PTR_ERR(ipctl->lev1); + ipctl->lev1_protect_phys = (u32)res->start + 0x400; - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - ipctl->lev2_protect_phys = (u32)res->start + 0x400; - ipctl->lev2 = devm_ioremap_resource(&pdev->dev, res); + ipctl->lev2 = devm_platform_get_and_ioremap_resource(pdev, 1, &res); if (IS_ERR(ipctl->lev2)) return PTR_ERR(ipctl->lev2); + ipctl->lev2_protect_phys = (u32)res->start + 0x400; ipctl->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(ipctl->clk)) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index b017dd400c46..60406f1f8337 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1303,15 +1303,17 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, bank->bank_ioport_nr = bank_ioport_nr; spin_lock_init(&bank->lock); - /* create irq hierarchical domain */ - bank->fwnode = of_node_to_fwnode(np); + if (pctl->domain) { + /* create irq hierarchical domain */ + bank->fwnode = of_node_to_fwnode(np); - bank->domain = irq_domain_create_hierarchy(pctl->domain, 0, - STM32_GPIO_IRQ_LINE, bank->fwnode, - &stm32_gpio_domain_ops, bank); + bank->domain = irq_domain_create_hierarchy(pctl->domain, 0, STM32_GPIO_IRQ_LINE, + bank->fwnode, &stm32_gpio_domain_ops, + bank); - if (!bank->domain) - return -ENODEV; + if (!bank->domain) + return -ENODEV; + } err = gpiochip_add_data(&bank->gpio_chip, bank); if (err) { @@ -1481,6 +1483,8 @@ int stm32_pctl_probe(struct platform_device *pdev) pctl->domain = stm32_pctrl_get_irq_domain(np); if (IS_ERR(pctl->domain)) return PTR_ERR(pctl->domain); + if (!pctl->domain) + dev_warn(dev, "pinctrl without interrupt support\n"); /* hwspinlock is optional */ hwlock_id = of_hwspin_lock_get_id(pdev->dev.of_node, 0); diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c index 4ada80317a3b..b5c1a8f363f3 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c @@ -158,26 +158,26 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 14), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand"), /* DQ6 */ + SUNXI_FUNCTION(0x2, "nand0"), /* DQ6 */ SUNXI_FUNCTION(0x3, "mmc2")), /* D6 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 15), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand"), /* DQ7 */ + SUNXI_FUNCTION(0x2, "nand0"), /* DQ7 */ SUNXI_FUNCTION(0x3, "mmc2")), /* D7 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 16), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand"), /* DQS */ + SUNXI_FUNCTION(0x2, "nand0"), /* DQS */ SUNXI_FUNCTION(0x3, "mmc2")), /* RST */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 17), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand")), /* CE2 */ + SUNXI_FUNCTION(0x2, "nand0")), /* CE2 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 18), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x2, "nand")), /* CE3 */ + SUNXI_FUNCTION(0x2, "nand0")), /* CE3 */ /* Hole */ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 2), SUNXI_FUNCTION(0x0, "gpio_in"), diff --git a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c index 2801ca706273..68a5b627fb9b 100644 --- a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c +++ b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c @@ -204,7 +204,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = { SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), SUNXI_FUNCTION(0x2, "lcd"), /* D20 */ - SUNXI_FUNCTION(0x3, "lvds1"), /* RX */ + SUNXI_FUNCTION(0x3, "uart2"), /* RX */ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)), SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 15), SUNXI_FUNCTION(0x0, "gpio_in"), diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index be7f4f95f455..24c861434bf1 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -544,6 +544,8 @@ static int sunxi_pconf_set(struct pinctrl_dev *pctldev, unsigned pin, struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); int i; + pin -= pctl->desc->pin_base; + for (i = 0; i < num_configs; i++) { enum pin_config_param param; unsigned long flags; diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c index 3104680b7485..979f92194e81 100644 --- a/drivers/platform/chrome/cros_ec.c +++ b/drivers/platform/chrome/cros_ec.c @@ -175,6 +175,8 @@ int cros_ec_register(struct cros_ec_device *ec_dev) ec_dev->max_request = sizeof(struct ec_params_hello); ec_dev->max_response = sizeof(struct ec_response_get_protocol_info); ec_dev->max_passthru = 0; + ec_dev->ec = NULL; + ec_dev->pd = NULL; ec_dev->din = devm_kzalloc(dev, ec_dev->din_size, GFP_KERNEL); if (!ec_dev->din) @@ -231,18 +233,16 @@ int cros_ec_register(struct cros_ec_device *ec_dev) if (IS_ERR(ec_dev->pd)) { dev_err(ec_dev->dev, "Failed to create CrOS PD platform device\n"); - platform_device_unregister(ec_dev->ec); - return PTR_ERR(ec_dev->pd); + err = PTR_ERR(ec_dev->pd); + goto exit; } } if (IS_ENABLED(CONFIG_OF) && dev->of_node) { err = devm_of_platform_populate(dev); if (err) { - platform_device_unregister(ec_dev->pd); - platform_device_unregister(ec_dev->ec); dev_err(dev, "Failed to register sub-devices\n"); - return err; + goto exit; } } @@ -264,12 +264,16 @@ int cros_ec_register(struct cros_ec_device *ec_dev) err = blocking_notifier_chain_register(&ec_dev->event_notifier, &ec_dev->notifier_ready); if (err) - return err; + goto exit; } dev_info(dev, "Chrome EC device registered\n"); return 0; +exit: + platform_device_unregister(ec_dev->ec); + platform_device_unregister(ec_dev->pd); + return err; } EXPORT_SYMBOL(cros_ec_register); diff --git a/drivers/platform/chrome/cros_ec_chardev.c b/drivers/platform/chrome/cros_ec_chardev.c index e0bce869c49a..fd33de546aee 100644 --- a/drivers/platform/chrome/cros_ec_chardev.c +++ b/drivers/platform/chrome/cros_ec_chardev.c @@ -301,7 +301,7 @@ static long cros_ec_chardev_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg) } s_cmd->command += ec->cmd_offset; - ret = cros_ec_cmd_xfer_status(ec->ec_dev, s_cmd); + ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd); /* Only copy data to userland if data was received. */ if (ret < 0) goto exit; diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index 272c89837d74..0dbceee87a4b 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -25,6 +25,9 @@ #define CIRC_ADD(idx, size, value) (((idx) + (value)) & ((size) - 1)) +/* waitqueue for log readers */ +static DECLARE_WAIT_QUEUE_HEAD(cros_ec_debugfs_log_wq); + /** * struct cros_ec_debugfs - EC debugging information. * @@ -33,7 +36,6 @@ * @log_buffer: circular buffer for console log information * @read_msg: preallocated EC command and buffer to read console log * @log_mutex: mutex to protect circular buffer - * @log_wq: waitqueue for log readers * @log_poll_work: recurring task to poll EC for new console log data * @panicinfo_blob: panicinfo debugfs blob */ @@ -44,7 +46,6 @@ struct cros_ec_debugfs { struct circ_buf log_buffer; struct cros_ec_command *read_msg; struct mutex log_mutex; - wait_queue_head_t log_wq; struct delayed_work log_poll_work; /* EC panicinfo */ struct debugfs_blob_wrapper panicinfo_blob; @@ -107,7 +108,7 @@ static void cros_ec_console_log_work(struct work_struct *__work) buf_space--; } - wake_up(&debug_info->log_wq); + wake_up(&cros_ec_debugfs_log_wq); } mutex_unlock(&debug_info->log_mutex); @@ -141,7 +142,7 @@ static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf, mutex_unlock(&debug_info->log_mutex); - ret = wait_event_interruptible(debug_info->log_wq, + ret = wait_event_interruptible(cros_ec_debugfs_log_wq, CIRC_CNT(cb->head, cb->tail, LOG_SIZE)); if (ret < 0) return ret; @@ -173,7 +174,7 @@ static __poll_t cros_ec_console_log_poll(struct file *file, struct cros_ec_debugfs *debug_info = file->private_data; __poll_t mask = 0; - poll_wait(file, &debug_info->log_wq, wait); + poll_wait(file, &cros_ec_debugfs_log_wq, wait); mutex_lock(&debug_info->log_mutex); if (CIRC_CNT(debug_info->log_buffer.head, @@ -377,7 +378,6 @@ static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info) debug_info->log_buffer.tail = 0; mutex_init(&debug_info->log_mutex); - init_waitqueue_head(&debug_info->log_wq); debugfs_create_file("console_log", S_IFREG | 0444, debug_info->dir, debug_info, &cros_ec_console_log_fops); diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 9f698a7aad12..e1fadf059e05 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -560,22 +560,28 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev) EXPORT_SYMBOL(cros_ec_query_all); /** - * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC. + * cros_ec_cmd_xfer() - Send a command to the ChromeOS EC. * @ec_dev: EC device. * @msg: Message to write. * - * Call this to send a command to the ChromeOS EC. This should be used instead of calling the EC's - * cmd_xfer() callback directly. It returns success status only if both the command was transmitted - * successfully and the EC replied with success status. + * Call this to send a command to the ChromeOS EC. This should be used instead + * of calling the EC's cmd_xfer() callback directly. This function does not + * convert EC command execution error codes to Linux error codes. Most + * in-kernel users will want to use cros_ec_cmd_xfer_status() instead since + * that function implements the conversion. * * Return: - * >=0 - The number of bytes transferred - * <0 - Linux error code + * >0 - EC command was executed successfully. The return value is the number + * of bytes returned by the EC (excluding the header). + * =0 - EC communication was successful. EC command execution results are + * reported in msg->result. The result will be EC_RES_SUCCESS if the + * command was executed successfully or report an EC command execution + * error. + * <0 - EC communication error. Return value is the Linux error code. */ -int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, - struct cros_ec_command *msg) +int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, struct cros_ec_command *msg) { - int ret, mapped; + int ret; mutex_lock(&ec_dev->lock); if (ec_dev->proto_version == EC_PROTO_VERSION_UNKNOWN) { @@ -616,6 +622,32 @@ int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, ret = send_command(ec_dev, msg); mutex_unlock(&ec_dev->lock); + return ret; +} +EXPORT_SYMBOL(cros_ec_cmd_xfer); + +/** + * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC. + * @ec_dev: EC device. + * @msg: Message to write. + * + * Call this to send a command to the ChromeOS EC. This should be used instead of calling the EC's + * cmd_xfer() callback directly. It returns success status only if both the command was transmitted + * successfully and the EC replied with success status. + * + * Return: + * >=0 - The number of bytes transferred. + * <0 - Linux error code + */ +int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, + struct cros_ec_command *msg) +{ + int ret, mapped; + + ret = cros_ec_cmd_xfer(ec_dev, msg); + if (ret < 0) + return ret; + mapped = cros_ec_map_error(msg->result); if (mapped) { dev_dbg(ec_dev->dev, "Command result (err: %d [%d])\n", diff --git a/drivers/platform/mips/Kconfig b/drivers/platform/mips/Kconfig index 8ac149173c64..495da331ca2d 100644 --- a/drivers/platform/mips/Kconfig +++ b/drivers/platform/mips/Kconfig @@ -17,7 +17,7 @@ menuconfig MIPS_PLATFORM_DEVICES if MIPS_PLATFORM_DEVICES config CPU_HWMON - tristate "Loongson-3 CPU HWMon Driver" + bool "Loongson-3 CPU HWMon Driver" depends on MACH_LOONGSON64 select HWMON default y diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c index 386389ffec41..d8c5f9195f85 100644 --- a/drivers/platform/mips/cpu_hwmon.c +++ b/drivers/platform/mips/cpu_hwmon.c @@ -55,55 +55,6 @@ int loongson3_cpu_temp(int cpu) static int nr_packages; static struct device *cpu_hwmon_dev; -static SENSOR_DEVICE_ATTR(name, 0444, NULL, NULL, 0); - -static struct attribute *cpu_hwmon_attributes[] = { - &sensor_dev_attr_name.dev_attr.attr, - NULL -}; - -/* Hwmon device attribute group */ -static struct attribute_group cpu_hwmon_attribute_group = { - .attrs = cpu_hwmon_attributes, -}; - -static ssize_t get_cpu_temp(struct device *dev, - struct device_attribute *attr, char *buf); -static ssize_t cpu_temp_label(struct device *dev, - struct device_attribute *attr, char *buf); - -static SENSOR_DEVICE_ATTR(temp1_input, 0444, get_cpu_temp, NULL, 1); -static SENSOR_DEVICE_ATTR(temp1_label, 0444, cpu_temp_label, NULL, 1); -static SENSOR_DEVICE_ATTR(temp2_input, 0444, get_cpu_temp, NULL, 2); -static SENSOR_DEVICE_ATTR(temp2_label, 0444, cpu_temp_label, NULL, 2); -static SENSOR_DEVICE_ATTR(temp3_input, 0444, get_cpu_temp, NULL, 3); -static SENSOR_DEVICE_ATTR(temp3_label, 0444, cpu_temp_label, NULL, 3); -static SENSOR_DEVICE_ATTR(temp4_input, 0444, get_cpu_temp, NULL, 4); -static SENSOR_DEVICE_ATTR(temp4_label, 0444, cpu_temp_label, NULL, 4); - -static const struct attribute *hwmon_cputemp[4][3] = { - { - &sensor_dev_attr_temp1_input.dev_attr.attr, - &sensor_dev_attr_temp1_label.dev_attr.attr, - NULL - }, - { - &sensor_dev_attr_temp2_input.dev_attr.attr, - &sensor_dev_attr_temp2_label.dev_attr.attr, - NULL - }, - { - &sensor_dev_attr_temp3_input.dev_attr.attr, - &sensor_dev_attr_temp3_label.dev_attr.attr, - NULL - }, - { - &sensor_dev_attr_temp4_input.dev_attr.attr, - &sensor_dev_attr_temp4_label.dev_attr.attr, - NULL - } -}; - static ssize_t cpu_temp_label(struct device *dev, struct device_attribute *attr, char *buf) { @@ -121,23 +72,46 @@ static ssize_t get_cpu_temp(struct device *dev, return sprintf(buf, "%d\n", value); } -static int create_sysfs_cputemp_files(struct kobject *kobj) +static SENSOR_DEVICE_ATTR(temp1_input, 0444, get_cpu_temp, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_label, 0444, cpu_temp_label, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_input, 0444, get_cpu_temp, NULL, 2); +static SENSOR_DEVICE_ATTR(temp2_label, 0444, cpu_temp_label, NULL, 2); +static SENSOR_DEVICE_ATTR(temp3_input, 0444, get_cpu_temp, NULL, 3); +static SENSOR_DEVICE_ATTR(temp3_label, 0444, cpu_temp_label, NULL, 3); +static SENSOR_DEVICE_ATTR(temp4_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp4_label, 0444, cpu_temp_label, NULL, 4); + +static struct attribute *cpu_hwmon_attributes[] = { + &sensor_dev_attr_temp1_input.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_label.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_label.dev_attr.attr, + &sensor_dev_attr_temp4_input.dev_attr.attr, + &sensor_dev_attr_temp4_label.dev_attr.attr, + NULL +}; + +static umode_t cpu_hwmon_is_visible(struct kobject *kobj, + struct attribute *attr, int i) { - int i, ret = 0; + int id = i / 2; - for (i = 0; i < nr_packages; i++) - ret = sysfs_create_files(kobj, hwmon_cputemp[i]); - - return ret; + if (id < nr_packages) + return attr->mode; + return 0; } -static void remove_sysfs_cputemp_files(struct kobject *kobj) -{ - int i; +static struct attribute_group cpu_hwmon_group = { + .attrs = cpu_hwmon_attributes, + .is_visible = cpu_hwmon_is_visible, +}; - for (i = 0; i < nr_packages; i++) - sysfs_remove_files(kobj, hwmon_cputemp[i]); -} +static const struct attribute_group *cpu_hwmon_groups[] = { + &cpu_hwmon_group, + NULL +}; #define CPU_THERMAL_THRESHOLD 90000 static struct delayed_work thermal_work; @@ -159,50 +133,31 @@ static void do_thermal_timer(struct work_struct *work) static int __init loongson_hwmon_init(void) { - int ret; - pr_info("Loongson Hwmon Enter...\n"); if (cpu_has_csr()) csr_temp_enable = csr_readl(LOONGSON_CSR_FEATURES) & LOONGSON_CSRF_TEMP; - cpu_hwmon_dev = hwmon_device_register_with_info(NULL, "cpu_hwmon", NULL, NULL, NULL); - if (IS_ERR(cpu_hwmon_dev)) { - ret = PTR_ERR(cpu_hwmon_dev); - pr_err("hwmon_device_register fail!\n"); - goto fail_hwmon_device_register; - } - nr_packages = loongson_sysconf.nr_cpus / loongson_sysconf.cores_per_package; - ret = create_sysfs_cputemp_files(&cpu_hwmon_dev->kobj); - if (ret) { - pr_err("fail to create cpu temperature interface!\n"); - goto fail_create_sysfs_cputemp_files; + cpu_hwmon_dev = hwmon_device_register_with_groups(NULL, "cpu_hwmon", + NULL, cpu_hwmon_groups); + if (IS_ERR(cpu_hwmon_dev)) { + pr_err("hwmon_device_register fail!\n"); + return PTR_ERR(cpu_hwmon_dev); } INIT_DEFERRABLE_WORK(&thermal_work, do_thermal_timer); schedule_delayed_work(&thermal_work, msecs_to_jiffies(20000)); - return ret; - -fail_create_sysfs_cputemp_files: - sysfs_remove_group(&cpu_hwmon_dev->kobj, - &cpu_hwmon_attribute_group); - hwmon_device_unregister(cpu_hwmon_dev); - -fail_hwmon_device_register: - return ret; + return 0; } static void __exit loongson_hwmon_exit(void) { cancel_delayed_work_sync(&thermal_work); - remove_sysfs_cputemp_files(&cpu_hwmon_dev->kobj); - sysfs_remove_group(&cpu_hwmon_dev->kobj, - &cpu_hwmon_attribute_group); hwmon_device_unregister(cpu_hwmon_dev); } diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index e94e59283ecb..012639f6d335 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -62,6 +62,7 @@ enum hp_wmi_event_ids { HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D, HPWMI_PEAKSHIFT_PERIOD = 0x0F, HPWMI_BATTERY_CHARGE_PERIOD = 0x10, + HPWMI_SANITIZATION_MODE = 0x17, }; struct bios_args { @@ -629,6 +630,8 @@ static void hp_wmi_notify(u32 value, void *context) break; case HPWMI_BATTERY_CHARGE_PERIOD: break; + case HPWMI_SANITIZATION_MODE: + break; default: pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; diff --git a/drivers/power/reset/arm-versatile-reboot.c b/drivers/power/reset/arm-versatile-reboot.c index 08d0a07b58ef..c7624d7611a7 100644 --- a/drivers/power/reset/arm-versatile-reboot.c +++ b/drivers/power/reset/arm-versatile-reboot.c @@ -146,6 +146,7 @@ static int __init versatile_reboot_probe(void) versatile_reboot_type = (enum versatile_reboot)reboot_id->data; syscon_regmap = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(syscon_regmap)) return PTR_ERR(syscon_regmap); diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c index bf3f14fb5f24..05e4120fd702 100644 --- a/drivers/pwm/pwm-lp3943.c +++ b/drivers/pwm/pwm-lp3943.c @@ -125,6 +125,7 @@ static int lp3943_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, if (err) return err; + duty_ns = min(duty_ns, period_ns); val = (u8)(duty_ns * LP3943_MAX_DUTY / period_ns); return lp3943_write_byte(lp3943, reg_duty, val); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 2c48e55c4104..6e3f3511e7dd 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2027,10 +2027,13 @@ struct regulator *_regulator_get(struct device *dev, const char *id, rdev->exclusive = 1; ret = _regulator_is_enabled(rdev); - if (ret > 0) + if (ret > 0) { rdev->use_count = 1; - else + regulator->enable_count = 1; + } else { rdev->use_count = 0; + regulator->enable_count = 0; + } } link = device_link_add(dev, &rdev->dev, DL_FLAG_STATELESS); diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 01a12cfcea7c..0a19500d3725 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -531,6 +531,7 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip) parent = of_get_child_by_name(np, "regulators"); if (!parent) { dev_err(dev, "regulators node not found\n"); + of_node_put(np); return -EINVAL; } @@ -560,6 +561,7 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip) } of_node_put(parent); + of_node_put(np); if (ret < 0) { dev_err(dev, "Error parsing regulator init data: %d\n", ret); diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index 8d784a2a09d8..05d227f9d2f2 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -844,32 +844,31 @@ static const struct rpm_regulator_data rpm_pm8950_regulators[] = { { "s2", QCOM_SMD_RPM_SMPA, 2, &pm8950_hfsmps, "vdd_s2" }, { "s3", QCOM_SMD_RPM_SMPA, 3, &pm8950_hfsmps, "vdd_s3" }, { "s4", QCOM_SMD_RPM_SMPA, 4, &pm8950_hfsmps, "vdd_s4" }, - { "s5", QCOM_SMD_RPM_SMPA, 5, &pm8950_ftsmps2p5, "vdd_s5" }, + /* S5 is managed via SPMI. */ { "s6", QCOM_SMD_RPM_SMPA, 6, &pm8950_hfsmps, "vdd_s6" }, { "l1", QCOM_SMD_RPM_LDOA, 1, &pm8950_ult_nldo, "vdd_l1_l19" }, { "l2", QCOM_SMD_RPM_LDOA, 2, &pm8950_ult_nldo, "vdd_l2_l23" }, { "l3", QCOM_SMD_RPM_LDOA, 3, &pm8950_ult_nldo, "vdd_l3" }, - { "l4", QCOM_SMD_RPM_LDOA, 4, &pm8950_ult_pldo, "vdd_l4_l5_l6_l7_l16" }, - { "l5", QCOM_SMD_RPM_LDOA, 5, &pm8950_pldo_lv, "vdd_l4_l5_l6_l7_l16" }, - { "l6", QCOM_SMD_RPM_LDOA, 6, &pm8950_pldo_lv, "vdd_l4_l5_l6_l7_l16" }, - { "l7", QCOM_SMD_RPM_LDOA, 7, &pm8950_pldo_lv, "vdd_l4_l5_l6_l7_l16" }, + /* L4 seems not to exist. */ + { "l5", QCOM_SMD_RPM_LDOA, 5, &pm8950_pldo_lv, "vdd_l5_l6_l7_l16" }, + { "l6", QCOM_SMD_RPM_LDOA, 6, &pm8950_pldo_lv, "vdd_l5_l6_l7_l16" }, + { "l7", QCOM_SMD_RPM_LDOA, 7, &pm8950_pldo_lv, "vdd_l5_l6_l7_l16" }, { "l8", QCOM_SMD_RPM_LDOA, 8, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" }, { "l9", QCOM_SMD_RPM_LDOA, 9, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" }, { "l10", QCOM_SMD_RPM_LDOA, 10, &pm8950_ult_nldo, "vdd_l9_l10_l13_l14_l15_l18"}, - { "l11", QCOM_SMD_RPM_LDOA, 11, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22"}, - { "l12", QCOM_SMD_RPM_LDOA, 12, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22"}, - { "l13", QCOM_SMD_RPM_LDOA, 13, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"}, - { "l14", QCOM_SMD_RPM_LDOA, 14, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"}, - { "l15", QCOM_SMD_RPM_LDOA, 15, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"}, - { "l16", QCOM_SMD_RPM_LDOA, 16, &pm8950_ult_pldo, "vdd_l4_l5_l6_l7_l16"}, - { "l17", QCOM_SMD_RPM_LDOA, 17, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22"}, - { "l18", QCOM_SMD_RPM_LDOA, 18, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18"}, - { "l19", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l1_l19"}, - { "l20", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l20"}, - { "l21", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l21"}, - { "l22", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l8_l11_l12_l17_l22"}, - { "l23", QCOM_SMD_RPM_LDOA, 18, &pm8950_pldo, "vdd_l2_l23"}, + { "l11", QCOM_SMD_RPM_LDOA, 11, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" }, + { "l12", QCOM_SMD_RPM_LDOA, 12, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" }, + { "l13", QCOM_SMD_RPM_LDOA, 13, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" }, + { "l14", QCOM_SMD_RPM_LDOA, 14, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" }, + { "l15", QCOM_SMD_RPM_LDOA, 15, &pm8950_ult_pldo, "vdd_l9_l10_l13_l14_l15_l18" }, + { "l16", QCOM_SMD_RPM_LDOA, 16, &pm8950_ult_pldo, "vdd_l5_l6_l7_l16" }, + { "l17", QCOM_SMD_RPM_LDOA, 17, &pm8950_ult_pldo, "vdd_l8_l11_l12_l17_l22" }, + /* L18 seems not to exist. */ + { "l19", QCOM_SMD_RPM_LDOA, 19, &pm8950_pldo, "vdd_l1_l19" }, + /* L20 & L21 seem not to exist. */ + { "l22", QCOM_SMD_RPM_LDOA, 22, &pm8950_pldo, "vdd_l8_l11_l12_l17_l22" }, + { "l23", QCOM_SMD_RPM_LDOA, 23, &pm8950_pldo, "vdd_l2_l23" }, {} }; diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index 19903de6268d..a4db9f6100d2 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -1388,9 +1388,9 @@ static int qcom_smd_parse_edge(struct device *dev, edge->name = node->name; irq = irq_of_parse_and_map(node, 0); - if (irq < 0) { + if (!irq) { dev_err(dev, "required smd interrupt missing\n"); - ret = irq; + ret = -EINVAL; goto put_node; } diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 7c88d190c51f..625effe6cb65 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -26,6 +26,15 @@ struct class *rtc_class; static void rtc_device_release(struct device *dev) { struct rtc_device *rtc = to_rtc_device(dev); + struct timerqueue_head *head = &rtc->timerqueue; + struct timerqueue_node *node; + + mutex_lock(&rtc->ops_lock); + while ((node = timerqueue_getnext(head))) + timerqueue_del(head, node); + mutex_unlock(&rtc->ops_lock); + + cancel_work_sync(&rtc->irqwork); ida_simple_remove(&rtc_ida, rtc->id); kfree(rtc); diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 5add637c9ad2..b036ff33fbe6 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -99,6 +99,17 @@ unsigned int mc146818_get_time(struct rtc_time *time) } EXPORT_SYMBOL_GPL(mc146818_get_time); +/* AMD systems don't allow access to AltCentury with DV1 */ +static bool apply_amd_register_a_behavior(void) +{ +#ifdef CONFIG_X86 + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return true; +#endif + return false; +} + /* Set the current date and time in the real time clock. */ int mc146818_set_time(struct rtc_time *time) { @@ -172,7 +183,10 @@ int mc146818_set_time(struct rtc_time *time) save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + if (apply_amd_register_a_behavior()) + CMOS_WRITE((save_freq_select & ~RTC_AMD_BANK_SELECT), RTC_FREQ_SELECT); + else + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); #ifdef CONFIG_MACH_DECSTATION CMOS_WRITE(real_yrs, RTC_DEC_YEAR); diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index 1894aded4c85..acfcb378767d 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -269,6 +269,8 @@ static int mtk_rtc_probe(struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; rtc->addr_base = res->start; rtc->data = of_device_get_match_data(&pdev->dev); diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index f0a6861ff3ae..715513311ece 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -366,7 +366,8 @@ static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127) static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) { struct pcf2127 *pcf2127 = dev_get_drvdata(dev); - unsigned int buf[5], ctrl2; + u8 buf[5]; + unsigned int ctrl2; int ret; ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2); diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index f2818cdd11d8..52b36b7c6129 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -138,7 +138,7 @@ struct sun6i_rtc_dev { const struct sun6i_rtc_clk_data *data; void __iomem *base; int irq; - unsigned long alarm; + time64_t alarm; struct clk_hw hw; struct clk_hw *int_osc; @@ -510,10 +510,8 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm) struct sun6i_rtc_dev *chip = dev_get_drvdata(dev); struct rtc_time *alrm_tm = &wkalrm->time; struct rtc_time tm_now; - unsigned long time_now = 0; - unsigned long time_set = 0; - unsigned long time_gap = 0; - int ret = 0; + time64_t time_now, time_set; + int ret; ret = sun6i_rtc_gettime(dev, &tm_now); if (ret < 0) { @@ -528,9 +526,7 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm) return -EINVAL; } - time_gap = time_set - time_now; - - if (time_gap > U32_MAX) { + if ((time_set - time_now) > U32_MAX) { dev_err(dev, "Date too far in the future\n"); return -EINVAL; } @@ -539,7 +535,7 @@ static int sun6i_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm) writel(0, chip->base + SUN6I_ALRM_COUNTER); usleep_range(100, 300); - writel(time_gap, chip->base + SUN6I_ALRM_COUNTER); + writel(time_set - time_now, chip->base + SUN6I_ALRM_COUNTER); chip->alarm = time_set; sun6i_rtc_setaie(wkalrm->enabled, chip); diff --git a/drivers/s390/char/keyboard.h b/drivers/s390/char/keyboard.h index c467589c7f45..c06d399b9b1f 100644 --- a/drivers/s390/char/keyboard.h +++ b/drivers/s390/char/keyboard.h @@ -56,7 +56,7 @@ static inline void kbd_put_queue(struct tty_port *port, int ch) { tty_insert_flip_char(port, ch, 0); - tty_schedule_flip(port); + tty_flip_buffer_push(port); } static inline void @@ -64,5 +64,5 @@ kbd_puts_queue(struct tty_port *port, char *cp) { while (*cp) tty_insert_flip_char(port, *cp++, 0); - tty_schedule_flip(port); + tty_flip_buffer_push(port); } diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 6cb48ae8e124..d8967bf72ef2 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -3631,10 +3631,19 @@ static struct DeviceCtlBlk *device_alloc(struct AdapterCtlBlk *acb, #endif if (dcb->target_lun != 0) { /* Copy settings */ - struct DeviceCtlBlk *p; - list_for_each_entry(p, &acb->dcb_list, list) - if (p->target_id == dcb->target_id) + struct DeviceCtlBlk *p = NULL, *iter; + + list_for_each_entry(iter, &acb->dcb_list, list) + if (iter->target_id == dcb->target_id) { + p = iter; break; + } + + if (!p) { + kfree(dcb); + return NULL; + } + dprintkdbg(DBG_1, "device_alloc: <%02i-%i> copy from <%02i-%i>\n", dcb->target_id, dcb->target_lun, diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 5ea426effa60..bbc5d6b9be73 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -1969,7 +1969,7 @@ EXPORT_SYMBOL(fcoe_ctlr_recv_flogi); * * Returns: u64 fc world wide name */ -u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN], +u64 fcoe_wwn_from_mac(unsigned char mac[ETH_ALEN], unsigned int scheme, unsigned int port) { u64 wwn; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index cd41dc061d87..dfe7e6370d84 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2738,6 +2738,7 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) struct hisi_hba *hisi_hba = shost_priv(shost); struct device *dev = hisi_hba->dev; int ret = sas_slave_configure(sdev); + unsigned int max_sectors; if (ret) return ret; @@ -2755,6 +2756,12 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) } } + /* Set according to IOMMU IOVA caching limit */ + max_sectors = min_t(size_t, queue_max_hw_sectors(sdev->request_queue), + (PAGE_SIZE * 32) >> SECTOR_SHIFT); + + blk_queue_max_hw_sectors(sdev->request_queue, max_sectors); + return 0; } diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index b0aa58d117cc..90e8a538b078 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9792,7 +9792,7 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) GFP_KERNEL); if (!ioa_cfg->hrrq[i].host_rrq) { - while (--i > 0) + while (--i >= 0) dma_free_coherent(&pdev->dev, sizeof(u32) * ioa_cfg->hrrq[i].size, ioa_cfg->hrrq[i].host_rrq, @@ -10065,7 +10065,7 @@ static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->vectors_info[i].desc, &ioa_cfg->hrrq[i]); if (rc) { - while (--i >= 0) + while (--i > 0) free_irq(pci_irq_vector(pdev, i), &ioa_cfg->hrrq[i]); return rc; diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 47e832b7f2c2..bfbc1c4fcab1 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -4281,6 +4281,9 @@ struct wqe_common { #define wqe_sup_SHIFT 6 #define wqe_sup_MASK 0x00000001 #define wqe_sup_WORD word11 +#define wqe_ffrq_SHIFT 6 +#define wqe_ffrq_MASK 0x00000001 +#define wqe_ffrq_WORD word11 #define wqe_wqec_SHIFT 7 #define wqe_wqec_MASK 0x00000001 #define wqe_wqec_WORD word11 diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index e33f752318c1..1e22364a31fc 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -857,7 +857,8 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_nvmet_invalidate_host(phba, ndlp); if (ndlp->nlp_DID == Fabric_DID) { - if (vport->port_state <= LPFC_FDISC) + if (vport->port_state <= LPFC_FDISC || + vport->fc_flag & FC_PT2PT) goto out; lpfc_linkdown_port(vport); spin_lock_irq(shost->host_lock); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 03c81cec6bc9..ef92e0b4b9cf 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1315,7 +1315,8 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, { struct lpfc_hba *phba = vport->phba; struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd; - struct lpfc_iocbq *pwqeq = &(lpfc_ncmd->cur_iocbq); + struct nvme_common_command *sqe; + struct lpfc_iocbq *pwqeq = &lpfc_ncmd->cur_iocbq; union lpfc_wqe128 *wqe = &pwqeq->wqe; uint32_t req_len; @@ -1371,8 +1372,14 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, cstat->control_requests++; } - if (pnode->nlp_nvme_info & NLP_NVME_NSLER) + if (pnode->nlp_nvme_info & NLP_NVME_NSLER) { bf_set(wqe_erp, &wqe->generic.wqe_com, 1); + sqe = &((struct nvme_fc_cmd_iu *) + nCmd->cmdaddr)->sqe.common; + if (sqe->opcode == nvme_admin_async_event) + bf_set(wqe_ffrq, &wqe->generic.wqe_com, 1); + } + /* * Finish initializing those WQE fields that are independent * of the nvme_cmnd request_buffer diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index a50f870c5f72..755d68b98160 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -17445,7 +17445,6 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) case FC_RCTL_ELS_REP: /* extended link services reply */ case FC_RCTL_ELS4_REQ: /* FC-4 ELS request */ case FC_RCTL_ELS4_REP: /* FC-4 ELS reply */ - case FC_RCTL_BA_NOP: /* basic link service NOP */ case FC_RCTL_BA_ABTS: /* basic link service abort */ case FC_RCTL_BA_RMC: /* remove connection */ case FC_RCTL_BA_ACC: /* basic accept */ @@ -17466,6 +17465,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) fc_vft_hdr = (struct fc_vft_header *)fc_hdr; fc_hdr = &((struct fc_frame_header *)fc_vft_hdr)[1]; return lpfc_fc_frame_check(phba, fc_hdr); + case FC_RCTL_BA_NOP: /* basic link service NOP */ default: goto drop; } @@ -18284,12 +18284,14 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport, if (!lpfc_complete_unsol_iocb(phba, phba->sli4_hba.els_wq->pring, iocbq, fc_hdr->fh_r_ctl, - fc_hdr->fh_type)) + fc_hdr->fh_type)) { lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "2540 Ring %d handler: unexpected Rctl " "x%x Type x%x received\n", LPFC_ELS_RING, fc_hdr->fh_r_ctl, fc_hdr->fh_type); + lpfc_in_buf_free(phba, &seq_dmabuf->dbuf); + } /* Free iocb created in lpfc_prep_seq */ list_for_each_entry_safe(curr_iocb, next_iocb, diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 80f546976c7e..daffa36988ae 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4634,7 +4634,7 @@ static int __init megaraid_init(void) * major number allocation. */ major = register_chrdev(0, "megadev_legacy", &megadev_fops); - if (!major) { + if (major < 0) { printk(KERN_WARNING "megaraid: failed to register char device\n"); } diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index 5fa0f4ed6565..ad17c2beaaca 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1241,7 +1241,8 @@ static void myrb_cleanup(struct myrb_hba *cb) myrb_unmap(cb); if (cb->mmio_base) { - cb->disable_intr(cb->io_base); + if (cb->disable_intr) + cb->disable_intr(cb->io_base); iounmap(cb->mmio_base); } if (cb->irq) @@ -3515,9 +3516,13 @@ static struct myrb_hba *myrb_detect(struct pci_dev *pdev, mutex_init(&cb->dcmd_mutex); mutex_init(&cb->dma_mutex); cb->pdev = pdev; + cb->host = shost; - if (pci_enable_device(pdev)) - goto failure; + if (pci_enable_device(pdev)) { + dev_err(&pdev->dev, "Failed to enable PCI device\n"); + scsi_host_put(shost); + return NULL; + } if (privdata->hw_init == DAC960_PD_hw_init || privdata->hw_init == DAC960_P_hw_init) { diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index cbe5fab793eb..ce10d680c56c 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -4528,7 +4528,7 @@ pmcraid_register_interrupt_handler(struct pmcraid_instance *pinstance) return 0; out_unwind: - while (--i > 0) + while (--i >= 0) free_irq(pci_irq_vector(pdev, i), &pinstance->hrrq_vector[i]); pci_free_irq_vectors(pdev); return rc; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index cf9ae0ab489a..ba823e8eb902 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3773,6 +3773,9 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&cmd->cmd_lock, flags); if (cmd->aborted) { + if (cmd->sg_mapped) + qlt_unmap_sg(vha, cmd); + spin_unlock_irqrestore(&cmd->cmd_lock, flags); /* * It's normal to see 2 calls in this path: diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 6b00de6b6f0e..5eb959b5f701 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2746,6 +2746,24 @@ static void zbc_open_zone(struct sdebug_dev_info *devip, } } +static inline void zbc_set_zone_full(struct sdebug_dev_info *devip, + struct sdeb_zone_state *zsp) +{ + switch (zsp->z_cond) { + case ZC2_IMPLICIT_OPEN: + devip->nr_imp_open--; + break; + case ZC3_EXPLICIT_OPEN: + devip->nr_exp_open--; + break; + default: + WARN_ONCE(true, "Invalid zone %llu condition %x\n", + zsp->z_start, zsp->z_cond); + break; + } + zsp->z_cond = ZC5_FULL; +} + static void zbc_inc_wp(struct sdebug_dev_info *devip, unsigned long long lba, unsigned int num) { @@ -2758,7 +2776,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, if (zsp->z_type == ZBC_ZONE_TYPE_SWR) { zsp->z_wp += num; if (zsp->z_wp >= zend) - zsp->z_cond = ZC5_FULL; + zbc_set_zone_full(devip, zsp); return; } @@ -2777,7 +2795,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, n = num; } if (zsp->z_wp >= zend) - zsp->z_cond = ZC5_FULL; + zbc_set_zone_full(devip, zsp); num -= n; lba += n; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 56e291708587..bd068d3bb455 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3511,7 +3511,6 @@ static int sd_probe(struct device *dev) out_put: put_disk(gd); out_free: - sd_zbc_release_disk(sdkp); kfree(sdkp); out: scsi_autopm_put_device(sdp); diff --git a/drivers/scsi/ufs/ti-j721e-ufs.c b/drivers/scsi/ufs/ti-j721e-ufs.c index eafe0db98d54..122d650d0810 100644 --- a/drivers/scsi/ufs/ti-j721e-ufs.c +++ b/drivers/scsi/ufs/ti-j721e-ufs.c @@ -29,11 +29,9 @@ static int ti_j721e_ufs_probe(struct platform_device *pdev) return PTR_ERR(regbase); pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); - if (ret < 0) { - pm_runtime_put_noidle(dev); + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) goto disable_pm; - } /* Select MPHY refclk frequency */ clk = devm_clk_get(dev, NULL); diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 9fe3b93440f6..707e4a7a1838 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -637,12 +637,7 @@ static int ufs_qcom_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) return err; } - err = ufs_qcom_ice_resume(host); - if (err) - return err; - - hba->is_sys_suspended = false; - return 0; + return ufs_qcom_ice_resume(host); } static void ufs_qcom_dev_ref_clk_ctrl(struct ufs_qcom_host *host, bool enable) @@ -683,8 +678,11 @@ static void ufs_qcom_dev_ref_clk_ctrl(struct ufs_qcom_host *host, bool enable) writel_relaxed(temp, host->dev_ref_clk_ctrl_mmio); - /* ensure that ref_clk is enabled/disabled before we return */ - wmb(); + /* + * Make sure the write to ref_clk reaches the destination and + * not stored in a Write Buffer (WB). + */ + readl(host->dev_ref_clk_ctrl_mmio); /* * If we call hibern8 exit after this, we need to make sure that diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c index f13564a3e4d1..582ca374595c 100644 --- a/drivers/scsi/ufs/ufshcd-pltfrm.c +++ b/drivers/scsi/ufs/ufshcd-pltfrm.c @@ -107,9 +107,20 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba) return ret; } +static bool phandle_exists(const struct device_node *np, + const char *phandle_name, int index) +{ + struct device_node *parse_np = of_parse_phandle(np, phandle_name, index); + + if (parse_np) + of_node_put(parse_np); + + return parse_np != NULL; +} + #define MAX_PROP_SIZE 32 static int ufshcd_populate_vreg(struct device *dev, const char *name, - struct ufs_vreg **out_vreg) + struct ufs_vreg **out_vreg) { int ret = 0; char prop_name[MAX_PROP_SIZE]; @@ -122,7 +133,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, } snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name); - if (!of_parse_phandle(np, prop_name, 0)) { + if (!phandle_exists(np, prop_name, 0)) { dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n", __func__, prop_name); goto out; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index e83fbb4f0593..a611d05b644a 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -113,8 +113,13 @@ int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len, if (!regs) return -ENOMEM; - for (pos = 0; pos < len; pos += 4) + for (pos = 0; pos < len; pos += 4) { + if (offset == 0 && + pos >= REG_UIC_ERROR_CODE_PHY_ADAPTER_LAYER && + pos <= REG_UIC_ERROR_CODE_DME) + continue; regs[pos / 4] = ufshcd_readl(hba, offset + pos); + } ufshcd_hex_dump(prefix, regs, len); kfree(regs); diff --git a/drivers/scsi/vmw_pvscsi.h b/drivers/scsi/vmw_pvscsi.h index 75966d3f326e..d87c12324c03 100644 --- a/drivers/scsi/vmw_pvscsi.h +++ b/drivers/scsi/vmw_pvscsi.h @@ -333,8 +333,8 @@ struct PVSCSIRingReqDesc { u8 tag; u8 bus; u8 target; - u8 vcpuHint; - u8 unused[59]; + u16 vcpuHint; + u8 unused[58]; } __packed; /* diff --git a/drivers/soc/bcm/brcmstb/pm/pm-arm.c b/drivers/soc/bcm/brcmstb/pm/pm-arm.c index b1062334e608..c6ec7d95bcfc 100644 --- a/drivers/soc/bcm/brcmstb/pm/pm-arm.c +++ b/drivers/soc/bcm/brcmstb/pm/pm-arm.c @@ -780,6 +780,7 @@ static int brcmstb_pm_probe(struct platform_device *pdev) } ret = brcmstb_init_sram(dn); + of_node_put(dn); if (ret) { pr_err("error setting up SRAM for PM\n"); return ret; diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index 6065aaab6740..8482a4892b83 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c +++ b/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -735,7 +735,7 @@ static const struct of_device_id ixp4xx_npe_of_match[] = { static struct platform_driver ixp4xx_npe_driver = { .driver = { .name = "ixp4xx-npe", - .of_match_table = of_match_ptr(ixp4xx_npe_of_match), + .of_match_table = ixp4xx_npe_of_match, }, .probe = ixp4xx_npe_probe, .remove = ixp4xx_npe_remove, diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c index 70fbe70c6213..2e06f48d683d 100644 --- a/drivers/soc/qcom/llcc-qcom.c +++ b/drivers/soc/qcom/llcc-qcom.c @@ -496,6 +496,7 @@ static const struct of_device_id qcom_llcc_of_match[] = { { .compatible = "qcom,sdm845-llcc", .data = &sdm845_cfg }, { } }; +MODULE_DEVICE_TABLE(of, qcom_llcc_of_match); static struct platform_driver qcom_llcc_driver = { .driver = { diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c index a9709aae54ab..fb76c8bc3c64 100644 --- a/drivers/soc/qcom/smp2p.c +++ b/drivers/soc/qcom/smp2p.c @@ -420,6 +420,7 @@ static int smp2p_parse_ipc(struct qcom_smp2p *smp2p) } smp2p->ipc_regmap = syscon_node_to_regmap(syscon); + of_node_put(syscon); if (IS_ERR(smp2p->ipc_regmap)) return PTR_ERR(smp2p->ipc_regmap); diff --git a/drivers/soc/qcom/smsm.c b/drivers/soc/qcom/smsm.c index c428d0f78816..6564f15c5319 100644 --- a/drivers/soc/qcom/smsm.c +++ b/drivers/soc/qcom/smsm.c @@ -359,6 +359,7 @@ static int smsm_parse_ipc(struct qcom_smsm *smsm, unsigned host_id) return 0; host->ipc_regmap = syscon_node_to_regmap(syscon); + of_node_put(syscon); if (IS_ERR(host->ipc_regmap)) return PTR_ERR(host->ipc_regmap); diff --git a/drivers/soc/rockchip/grf.c b/drivers/soc/rockchip/grf.c index 494cf2b5bf7b..343ff61ccccb 100644 --- a/drivers/soc/rockchip/grf.c +++ b/drivers/soc/rockchip/grf.c @@ -148,12 +148,14 @@ static int __init rockchip_grf_init(void) return -ENODEV; if (!match || !match->data) { pr_err("%s: missing grf data\n", __func__); + of_node_put(np); return -EINVAL; } grf_info = match->data; grf = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(grf)) { pr_err("%s: could not get grf syscon\n", __func__); return PTR_ERR(grf); diff --git a/drivers/soc/ti/ti_sci_pm_domains.c b/drivers/soc/ti/ti_sci_pm_domains.c index 8afb3f45d263..a33ec7eaf23d 100644 --- a/drivers/soc/ti/ti_sci_pm_domains.c +++ b/drivers/soc/ti/ti_sci_pm_domains.c @@ -183,6 +183,8 @@ static int ti_sci_pm_domain_probe(struct platform_device *pdev) devm_kcalloc(dev, max_id + 1, sizeof(*pd_provider->data.domains), GFP_KERNEL); + if (!pd_provider->data.domains) + return -ENOMEM; pd_provider->data.num_domains = max_id + 1; pd_provider->data.xlate = ti_sci_pd_xlate; diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index 7f629544060d..a027cfd49df8 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -28,6 +28,7 @@ #define AMD_SPI_RX_COUNT_REG 0x4B #define AMD_SPI_STATUS_REG 0x4C +#define AMD_SPI_FIFO_SIZE 70 #define AMD_SPI_MEM_SIZE 200 /* M_CMD OP codes for SPI */ @@ -245,6 +246,11 @@ static int amd_spi_master_transfer(struct spi_master *master, return 0; } +static size_t amd_spi_max_transfer_size(struct spi_device *spi) +{ + return AMD_SPI_FIFO_SIZE; +} + static int amd_spi_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -278,6 +284,8 @@ static int amd_spi_probe(struct platform_device *pdev) master->flags = SPI_MASTER_HALF_DUPLEX; master->setup = amd_spi_master_setup; master->transfer_one_message = amd_spi_master_transfer; + master->max_transfer_size = amd_spi_max_transfer_size; + master->max_message_size = amd_spi_max_transfer_size; /* Register the controller with SPI framework */ err = devm_spi_register_master(dev, master); diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 33c32e931767..bb9d8386ba3b 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -1174,10 +1174,14 @@ static void bcm2835_spi_handle_err(struct spi_controller *ctlr, struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); /* if an error occurred and we have an active dma, then terminate */ - dmaengine_terminate_sync(ctlr->dma_tx); - bs->tx_dma_active = false; - dmaengine_terminate_sync(ctlr->dma_rx); - bs->rx_dma_active = false; + if (ctlr->dma_tx) { + dmaengine_terminate_sync(ctlr->dma_tx); + bs->tx_dma_active = false; + } + if (ctlr->dma_rx) { + dmaengine_terminate_sync(ctlr->dma_rx); + bs->rx_dma_active = false; + } bcm2835_spi_undo_prologue(bs); /* and reset */ diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c index 9851551ebbe0..46ae46a944c5 100644 --- a/drivers/spi/spi-fsl-qspi.c +++ b/drivers/spi/spi-fsl-qspi.c @@ -876,6 +876,10 @@ static int fsl_qspi_probe(struct platform_device *pdev) res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "QuadSPI-memory"); + if (!res) { + ret = -EINVAL; + goto err_put_ctrl; + } q->memmap_phy = res->start; /* Since there are 4 cs, map size required is 4 times ahb_buf_size */ q->ahb_addr = devm_ioremap(dev, q->memmap_phy, diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index 5f05d519fbbd..71376b6df89d 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -731,7 +731,7 @@ static int img_spfi_resume(struct device *dev) int ret; ret = pm_runtime_get_sync(dev); - if (ret) { + if (ret < 0) { pm_runtime_put_noidle(dev); return ret; } diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index e39fd38f5180..ea03cc589e61 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -1107,14 +1107,11 @@ static struct dma_chan *rspi_request_dma_chan(struct device *dev, } memset(&cfg, 0, sizeof(cfg)); + cfg.dst_addr = port_addr + RSPI_SPDR; + cfg.src_addr = port_addr + RSPI_SPDR; + cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; cfg.direction = dir; - if (dir == DMA_MEM_TO_DEV) { - cfg.dst_addr = port_addr; - cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; - } else { - cfg.src_addr = port_addr; - cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; - } ret = dmaengine_slave_config(chan, &cfg); if (ret) { @@ -1145,12 +1142,12 @@ static int rspi_request_dma(struct device *dev, struct spi_controller *ctlr, } ctlr->dma_tx = rspi_request_dma_chan(dev, DMA_MEM_TO_DEV, dma_tx_id, - res->start + RSPI_SPDR); + res->start); if (!ctlr->dma_tx) return -ENODEV; ctlr->dma_rx = rspi_request_dma_chan(dev, DMA_DEV_TO_MEM, dma_rx_id, - res->start + RSPI_SPDR); + res->start); if (!ctlr->dma_rx) { dma_release_channel(ctlr->dma_tx); ctlr->dma_tx = NULL; diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index 4f24f6392212..9c58dcd7b324 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -295,7 +295,8 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi, if (!op->data.nbytes) goto wait_nobusy; - if (readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF) + if ((readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF) || + qspi->fmode == CCR_FMODE_APM) goto out; reinit_completion(&qspi->data_completion); diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index e06aafe169e0..081da1fd3fd7 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -448,6 +448,7 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst, enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; struct dma_async_tx_descriptor *tx; int ret; + unsigned long time_left; tx = dmaengine_prep_dma_memcpy(chan, dma_dst, dma_src, len, flags); if (!tx) { @@ -467,9 +468,9 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst, } dma_async_issue_pending(chan); - ret = wait_for_completion_timeout(&qspi->transfer_complete, + time_left = wait_for_completion_timeout(&qspi->transfer_complete, msecs_to_jiffies(len)); - if (ret <= 0) { + if (time_left == 0) { dmaengine_terminate_sync(chan); dev_err(qspi->dev, "DMA wait_for_completion_timeout\n"); return -ETIMEDOUT; diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index 7769eadfaf61..ccc65cfc519f 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -685,7 +685,7 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev) if (!devpriv->usb_rx_buf) return -ENOMEM; - size = max(usb_endpoint_maxp(devpriv->ep_rx), MIN_BUF_SIZE); + size = max(usb_endpoint_maxp(devpriv->ep_tx), MIN_BUF_SIZE); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_tx_buf) return -ENOMEM; diff --git a/drivers/staging/fieldbus/anybuss/host.c b/drivers/staging/fieldbus/anybuss/host.c index 549cb7d51af8..2a20a1767d77 100644 --- a/drivers/staging/fieldbus/anybuss/host.c +++ b/drivers/staging/fieldbus/anybuss/host.c @@ -1384,7 +1384,7 @@ anybuss_host_common_probe(struct device *dev, goto err_device; return cd; err_device: - device_unregister(&cd->client->dev); + put_device(&cd->client->dev); err_kthread: kthread_stop(cd->qthread); err_reset: diff --git a/drivers/staging/greybus/audio_codec.c b/drivers/staging/greybus/audio_codec.c index 42ce6c88ea75..4ed29f852c23 100644 --- a/drivers/staging/greybus/audio_codec.c +++ b/drivers/staging/greybus/audio_codec.c @@ -621,8 +621,8 @@ static int gbcodec_mute_stream(struct snd_soc_dai *dai, int mute, int stream) break; } if (!data) { - dev_err(dai->dev, "%s:%s DATA connection missing\n", - dai->name, module->name); + dev_err(dai->dev, "%s DATA connection missing\n", + dai->name); mutex_unlock(&codec->lock); return -ENODEV; } diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c index 5c2ca61add8e..b65f2f3ef357 100644 --- a/drivers/staging/media/hantro/hantro_v4l2.c +++ b/drivers/staging/media/hantro/hantro_v4l2.c @@ -644,8 +644,12 @@ static int hantro_buf_prepare(struct vb2_buffer *vb) * (for OUTPUT buffers, if userspace passes 0 bytesused, v4l2-core sets * it to buffer length). */ - if (V4L2_TYPE_IS_CAPTURE(vq->type)) - vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage); + if (V4L2_TYPE_IS_CAPTURE(vq->type)) { + if (ctx->is_encoder) + vb2_set_plane_payload(vb, 0, 0); + else + vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage); + } return 0; } diff --git a/drivers/staging/media/rkvdec/rkvdec-h264.c b/drivers/staging/media/rkvdec/rkvdec-h264.c index 5487f6d0bcb6..7013f7ce3678 100644 --- a/drivers/staging/media/rkvdec/rkvdec-h264.c +++ b/drivers/staging/media/rkvdec/rkvdec-h264.c @@ -112,6 +112,7 @@ struct rkvdec_h264_run { const struct v4l2_ctrl_h264_sps *sps; const struct v4l2_ctrl_h264_pps *pps; const struct v4l2_ctrl_h264_scaling_matrix *scaling_matrix; + int ref_buf_idx[V4L2_H264_NUM_DPB_ENTRIES]; }; struct rkvdec_h264_ctx { @@ -661,8 +662,8 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx, WRITE_PPS(0xff, PROFILE_IDC); WRITE_PPS(1, CONSTRAINT_SET3_FLAG); WRITE_PPS(sps->chroma_format_idc, CHROMA_FORMAT_IDC); - WRITE_PPS(sps->bit_depth_luma_minus8 + 8, BIT_DEPTH_LUMA); - WRITE_PPS(sps->bit_depth_chroma_minus8 + 8, BIT_DEPTH_CHROMA); + WRITE_PPS(sps->bit_depth_luma_minus8, BIT_DEPTH_LUMA); + WRITE_PPS(sps->bit_depth_chroma_minus8, BIT_DEPTH_CHROMA); WRITE_PPS(0, QPPRIME_Y_ZERO_TRANSFORM_BYPASS_FLAG); WRITE_PPS(sps->log2_max_frame_num_minus4, LOG2_MAX_FRAME_NUM_MINUS4); WRITE_PPS(sps->max_num_ref_frames, MAX_NUM_REF_FRAMES); @@ -725,6 +726,26 @@ static void assemble_hw_pps(struct rkvdec_ctx *ctx, } } +static void lookup_ref_buf_idx(struct rkvdec_ctx *ctx, + struct rkvdec_h264_run *run) +{ + const struct v4l2_ctrl_h264_decode_params *dec_params = run->decode_params; + u32 i; + + for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { + struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx; + const struct v4l2_h264_dpb_entry *dpb = run->decode_params->dpb; + struct vb2_queue *cap_q = &m2m_ctx->cap_q_ctx.q; + int buf_idx = -1; + + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) + buf_idx = vb2_find_timestamp(cap_q, + dpb[i].reference_ts, 0); + + run->ref_buf_idx[i] = buf_idx; + } +} + static void assemble_hw_rps(struct rkvdec_ctx *ctx, struct rkvdec_h264_run *run) { @@ -762,7 +783,7 @@ static void assemble_hw_rps(struct rkvdec_ctx *ctx, for (j = 0; j < RKVDEC_NUM_REFLIST; j++) { for (i = 0; i < h264_ctx->reflists.num_valid; i++) { - u8 dpb_valid = 0; + bool dpb_valid = run->ref_buf_idx[i] >= 0; u8 idx = 0; switch (j) { @@ -779,8 +800,6 @@ static void assemble_hw_rps(struct rkvdec_ctx *ctx, if (idx >= ARRAY_SIZE(dec_params->dpb)) continue; - dpb_valid = !!(dpb[idx].flags & - V4L2_H264_DPB_ENTRY_FLAG_ACTIVE); set_ps_field(hw_rps, DPB_INFO(i, j), idx | dpb_valid << 4); @@ -859,13 +878,8 @@ get_ref_buf(struct rkvdec_ctx *ctx, struct rkvdec_h264_run *run, unsigned int dpb_idx) { struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx; - const struct v4l2_h264_dpb_entry *dpb = run->decode_params->dpb; struct vb2_queue *cap_q = &m2m_ctx->cap_q_ctx.q; - int buf_idx = -1; - - if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) - buf_idx = vb2_find_timestamp(cap_q, - dpb[dpb_idx].reference_ts, 0); + int buf_idx = run->ref_buf_idx[dpb_idx]; /* * If a DPB entry is unused or invalid, address of current destination @@ -1102,6 +1116,7 @@ static int rkvdec_h264_run(struct rkvdec_ctx *ctx) assemble_hw_scaling_list(ctx, &run); assemble_hw_pps(ctx, &run); + lookup_ref_buf_idx(ctx, &run); assemble_hw_rps(ctx, &run); config_registers(ctx, &run); diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c index a7788e7a9542..e384ea8d7280 100644 --- a/drivers/staging/media/rkvdec/rkvdec.c +++ b/drivers/staging/media/rkvdec/rkvdec.c @@ -1000,7 +1000,6 @@ static const char * const rkvdec_clk_names[] = { static int rkvdec_probe(struct platform_device *pdev) { struct rkvdec_dev *rkvdec; - struct resource *res; unsigned int i; int ret, irq; @@ -1032,8 +1031,7 @@ static int rkvdec_probe(struct platform_device *pdev) */ clk_set_rate(rkvdec->clocks[0].clk, 500 * 1000 * 1000); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - rkvdec->regs = devm_ioremap_resource(&pdev->dev, res); + rkvdec->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(rkvdec->regs)) return PTR_ERR(rkvdec->regs); diff --git a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c index 09b0b8a16e99..2e971cbe2d7a 100644 --- a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c +++ b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c @@ -267,6 +267,8 @@ static int rt2880_pinmux_pins(struct rt2880_priv *p) p->func[i]->pin_count, sizeof(int), GFP_KERNEL); + if (!p->func[i]->pins) + return -ENOMEM; for (j = 0; j < p->func[i]->pin_count; j++) p->func[i]->pins[j] = p->func[i]->pin_first + j; diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index e8e72f79ca00..aeb6f015fdda 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -651,9 +651,9 @@ static void rtllib_beacons_stop(struct rtllib_device *ieee) spin_lock_irqsave(&ieee->beacon_lock, flags); ieee->beacon_txing = 0; - del_timer_sync(&ieee->beacon_timer); spin_unlock_irqrestore(&ieee->beacon_lock, flags); + del_timer_sync(&ieee->beacon_timer); } diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c index 690b664df8fa..56a447651644 100644 --- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c +++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c @@ -528,9 +528,9 @@ static void ieee80211_beacons_stop(struct ieee80211_device *ieee) spin_lock_irqsave(&ieee->beacon_lock, flags); ieee->beacon_txing = 0; - del_timer_sync(&ieee->beacon_timer); spin_unlock_irqrestore(&ieee->beacon_lock, flags); + del_timer_sync(&ieee->beacon_timer); } void ieee80211_stop_send_beacons(struct ieee80211_device *ieee) diff --git a/drivers/staging/rtl8712/os_intfs.c b/drivers/staging/rtl8712/os_intfs.c index 2214aca09730..daa3180dfde3 100644 --- a/drivers/staging/rtl8712/os_intfs.c +++ b/drivers/staging/rtl8712/os_intfs.c @@ -332,7 +332,6 @@ void r8712_free_drv_sw(struct _adapter *padapter) r8712_free_evt_priv(&padapter->evtpriv); r8712_DeInitSwLeds(padapter); r8712_free_mlme_priv(&padapter->mlmepriv); - r8712_free_io_queue(padapter); _free_xmit_priv(&padapter->xmitpriv); _r8712_free_sta_priv(&padapter->stapriv); _r8712_free_recv_priv(&padapter->recvpriv); diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index fed96d4251bf..68d66c3ce2c8 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -266,6 +266,7 @@ static uint r8712_usb_dvobj_init(struct _adapter *padapter) static void r8712_usb_dvobj_deinit(struct _adapter *padapter) { + r8712_free_io_queue(padapter); } void rtl871x_intf_stop(struct _adapter *padapter) @@ -303,9 +304,6 @@ void r871x_dev_unload(struct _adapter *padapter) rtl8712_hal_deinit(padapter); } - /*s6.*/ - if (padapter->dvobj_deinit) - padapter->dvobj_deinit(padapter); padapter->bup = false; } } @@ -541,13 +539,13 @@ static int r871xu_drv_init(struct usb_interface *pusb_intf, } else { AutoloadFail = false; } - if (((mac[0] == 0xff) && (mac[1] == 0xff) && + if ((!AutoloadFail) || + ((mac[0] == 0xff) && (mac[1] == 0xff) && (mac[2] == 0xff) && (mac[3] == 0xff) && (mac[4] == 0xff) && (mac[5] == 0xff)) || ((mac[0] == 0x00) && (mac[1] == 0x00) && (mac[2] == 0x00) && (mac[3] == 0x00) && - (mac[4] == 0x00) && (mac[5] == 0x00)) || - (!AutoloadFail)) { + (mac[4] == 0x00) && (mac[5] == 0x00))) { mac[0] = 0x00; mac[1] = 0xe0; mac[2] = 0x4c; @@ -610,6 +608,8 @@ static void r871xu_dev_remove(struct usb_interface *pusb_intf) /* Stop driver mlme relation timer */ r8712_stop_drv_timers(padapter); r871x_dev_unload(padapter); + if (padapter->dvobj_deinit) + padapter->dvobj_deinit(padapter); r8712_free_drv_sw(padapter); free_netdev(pnetdev); diff --git a/drivers/staging/rtl8712/usb_ops.c b/drivers/staging/rtl8712/usb_ops.c index e64845e6adf3..af9966d03979 100644 --- a/drivers/staging/rtl8712/usb_ops.c +++ b/drivers/staging/rtl8712/usb_ops.c @@ -29,7 +29,8 @@ static u8 usb_read8(struct intf_hdl *intfhdl, u32 addr) u16 wvalue; u16 index; u16 len; - __le32 data; + int status; + __le32 data = 0; struct intf_priv *intfpriv = intfhdl->pintfpriv; request = 0x05; @@ -37,8 +38,10 @@ static u8 usb_read8(struct intf_hdl *intfhdl, u32 addr) index = 0; wvalue = (u16)(addr & 0x0000ffff); len = 1; - r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, &data, len, - requesttype); + status = r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, + &data, len, requesttype); + if (status < 0) + return 0; return (u8)(le32_to_cpu(data) & 0x0ff); } @@ -49,7 +52,8 @@ static u16 usb_read16(struct intf_hdl *intfhdl, u32 addr) u16 wvalue; u16 index; u16 len; - __le32 data; + int status; + __le32 data = 0; struct intf_priv *intfpriv = intfhdl->pintfpriv; request = 0x05; @@ -57,8 +61,10 @@ static u16 usb_read16(struct intf_hdl *intfhdl, u32 addr) index = 0; wvalue = (u16)(addr & 0x0000ffff); len = 2; - r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, &data, len, - requesttype); + status = r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, + &data, len, requesttype); + if (status < 0) + return 0; return (u16)(le32_to_cpu(data) & 0xffff); } @@ -69,7 +75,8 @@ static u32 usb_read32(struct intf_hdl *intfhdl, u32 addr) u16 wvalue; u16 index; u16 len; - __le32 data; + int status; + __le32 data = 0; struct intf_priv *intfpriv = intfhdl->pintfpriv; request = 0x05; @@ -77,8 +84,10 @@ static u32 usb_read32(struct intf_hdl *intfhdl, u32 addr) index = 0; wvalue = (u16)(addr & 0x0000ffff); len = 4; - r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, &data, len, - requesttype); + status = r8712_usbctrl_vendorreq(intfpriv, request, wvalue, index, + &data, len, requesttype); + if (status < 0) + return 0; return le32_to_cpu(data); } diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index 902ac8169948..083ff72976cf 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -1351,9 +1351,11 @@ static int rtw_wx_set_scan(struct net_device *dev, struct iw_request_info *a, sec_len = *(pos++); len -= 1; - if (sec_len > 0 && sec_len <= len) { + if (sec_len > 0 && + sec_len <= len && + sec_len <= 32) { ssid[ssid_index].SsidLength = sec_len; - memcpy(ssid[ssid_index].Ssid, pos, ssid[ssid_index].SsidLength); + memcpy(ssid[ssid_index].Ssid, pos, sec_len); /* DBG_871X("%s COMBO_SCAN with specific ssid:%s, %d\n", __func__ */ /* , ssid[ssid_index].Ssid, ssid[ssid_index].SsidLength); */ ssid_index++; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 109f019d2148..1eded5c4ebda 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -831,7 +831,6 @@ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, attrib->unmap_granularity = q->limits.discard_granularity / block_size; attrib->unmap_granularity_alignment = q->limits.discard_alignment / block_size; - attrib->unmap_zeroes_data = !!(q->limits.max_write_zeroes_sectors); return true; } EXPORT_SYMBOL(target_configure_unmap_from_queue); diff --git a/drivers/thermal/broadcom/bcm2711_thermal.c b/drivers/thermal/broadcom/bcm2711_thermal.c index 67c2a737bc9d..7b536c8a59dc 100644 --- a/drivers/thermal/broadcom/bcm2711_thermal.c +++ b/drivers/thermal/broadcom/bcm2711_thermal.c @@ -38,7 +38,6 @@ static int bcm2711_get_temp(void *data, int *temp) int offset = thermal_zone_get_offset(priv->thermal); u32 val; int ret; - long t; ret = regmap_read(priv->regmap, AVS_RO_TEMP_STATUS, &val); if (ret) @@ -50,9 +49,7 @@ static int bcm2711_get_temp(void *data, int *temp) val &= AVS_RO_TEMP_STATUS_DATA_MSK; /* Convert a HW code to a temperature reading (millidegree celsius) */ - t = slope * val + offset; - - *temp = t < 0 ? 0 : t; + *temp = slope * val + offset; return 0; } diff --git a/drivers/thermal/broadcom/sr-thermal.c b/drivers/thermal/broadcom/sr-thermal.c index 475ce2900771..85ab9edd580c 100644 --- a/drivers/thermal/broadcom/sr-thermal.c +++ b/drivers/thermal/broadcom/sr-thermal.c @@ -60,6 +60,9 @@ static int sr_thermal_probe(struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOENT; + sr_thermal->regs = (void __iomem *)devm_memremap(&pdev->dev, res->start, resource_size(res), MEMREMAP_WB); diff --git a/drivers/thermal/imx_sc_thermal.c b/drivers/thermal/imx_sc_thermal.c index 8d76dbfde6a9..331a241eb0ef 100644 --- a/drivers/thermal/imx_sc_thermal.c +++ b/drivers/thermal/imx_sc_thermal.c @@ -94,8 +94,8 @@ static int imx_sc_thermal_probe(struct platform_device *pdev) sensor = devm_kzalloc(&pdev->dev, sizeof(*sensor), GFP_KERNEL); if (!sensor) { of_node_put(child); - of_node_put(sensor_np); - return -ENOMEM; + ret = -ENOMEM; + goto put_node; } ret = thermal_zone_of_get_sensor_id(child, @@ -124,7 +124,9 @@ static int imx_sc_thermal_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "failed to add hwmon sysfs attributes\n"); } +put_node: of_node_put(sensor_np); + of_node_put(np); return ret; } diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c index 097266342e5e..4bcaab250676 100644 --- a/drivers/tty/cyclades.c +++ b/drivers/tty/cyclades.c @@ -556,7 +556,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip, } info->idle_stats.recv_idle = jiffies; } - tty_schedule_flip(port); + tty_flip_buffer_push(port); /* end of service */ cyy_writeb(info, CyRIR, save_xir & 0x3f); @@ -996,7 +996,7 @@ static void cyz_handle_rx(struct cyclades_port *info) mod_timer(&info->rx_full_timer, jiffies + 1); #endif info->idle_stats.recv_idle = jiffies; - tty_schedule_flip(&info->port); + tty_flip_buffer_push(&info->port); /* Update rx_get */ cy_writel(&buf_ctrl->rx_get, new_rx_get); @@ -1172,7 +1172,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) if (delta_count) wake_up_interruptible(&info->port.delta_msr_wait); if (special_count) - tty_schedule_flip(&info->port); + tty_flip_buffer_push(&info->port); } } diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c index c8c5cdfc5e19..d6e82eb61fc2 100644 --- a/drivers/tty/goldfish.c +++ b/drivers/tty/goldfish.c @@ -151,7 +151,7 @@ static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id) address = (unsigned long)(void *)buf; goldfish_tty_rw(qtty, address, count, 0); - tty_schedule_flip(&qtty->port); + tty_flip_buffer_push(&qtty->port); return IRQ_HANDLED; } @@ -407,6 +407,7 @@ static int goldfish_tty_probe(struct platform_device *pdev) err_tty_register_device_failed: free_irq(irq, qtty); err_dec_line_count: + tty_port_destroy(&qtty->port); goldfish_tty_current_line_count--; if (goldfish_tty_current_line_count == 0) goldfish_tty_delete_driver(); @@ -427,7 +428,8 @@ static int goldfish_tty_remove(struct platform_device *pdev) tty_unregister_device(goldfish_tty_driver, qtty->console.index); iounmap(qtty->base); qtty->base = NULL; - free_irq(qtty->irq, pdev); + free_irq(qtty->irq, qtty); + tty_port_destroy(&qtty->port); goldfish_tty_current_line_count--; if (goldfish_tty_current_line_count == 0) goldfish_tty_delete_driver(); diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c index f9f14104bd2c..d6528f3bb9b9 100644 --- a/drivers/tty/moxa.c +++ b/drivers/tty/moxa.c @@ -1385,7 +1385,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle, if (inited && !tty_throttled(tty) && MoxaPortRxQueue(p) > 0) { /* RX */ MoxaPortReadData(p); - tty_schedule_flip(&p->port); + tty_flip_buffer_push(&p->port); } } else { clear_bit(EMPTYWAIT, &p->statusflags); @@ -1410,7 +1410,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle, if (tty && (intr & IntrBreak) && !I_IGNBRK(tty)) { /* BREAK */ tty_insert_flip_char(&p->port, 0, TTY_BREAK); - tty_schedule_flip(&p->port); + tty_flip_buffer_push(&p->port); } if (intr & IntrLine) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 58190135efb7..12dde01e576b 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2073,6 +2073,35 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty, return ldata->read_tail != canon_head; } +/* + * If we finished a read at the exact location of an + * EOF (special EOL character that's a __DISABLED_CHAR) + * in the stream, silently eat the EOF. + */ +static void canon_skip_eof(struct tty_struct *tty) +{ + struct n_tty_data *ldata = tty->disc_data; + size_t tail, canon_head; + + canon_head = smp_load_acquire(&ldata->canon_head); + tail = ldata->read_tail; + + // No data? + if (tail == canon_head) + return; + + // See if the tail position is EOF in the circular buffer + tail &= (N_TTY_BUF_SIZE - 1); + if (!test_bit(tail, ldata->read_flags)) + return; + if (read_buf(ldata, tail) != __DISABLED_CHAR) + return; + + // Clear the EOL bit, skip the EOF char. + clear_bit(tail, ldata->read_flags); + smp_store_release(&ldata->read_tail, ldata->read_tail + 1); +} + /** * job_control - check job control * @tty: tty @@ -2142,7 +2171,14 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, */ if (*cookie) { if (ldata->icanon && !L_EXTPROC(tty)) { - if (canon_copy_from_read_buf(tty, &kb, &nr)) + /* + * If we have filled the user buffer, see + * if we should skip an EOF character before + * releasing the lock and returning done. + */ + if (!nr) + canon_skip_eof(tty); + else if (canon_copy_from_read_buf(tty, &kb, &nr)) return kb - kbuf; } else { if (copy_from_read_buf(tty, &kb, &nr)) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 23368cec7ee8..16498f5fba64 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -111,21 +111,11 @@ static void pty_unthrottle(struct tty_struct *tty) static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c) { struct tty_struct *to = tty->link; - unsigned long flags; - if (tty->stopped) + if (tty->stopped || !c) return 0; - if (c > 0) { - spin_lock_irqsave(&to->port->lock, flags); - /* Stuff the data into the input queue of the other end */ - c = tty_insert_flip_string(to->port, buf, c); - spin_unlock_irqrestore(&to->port->lock, flags); - /* And shovel */ - if (c) - tty_flip_buffer_push(to->port); - } - return c; + return tty_insert_flip_string_and_push_buffer(to->port, buf, c); } /** diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index cae61d1ebec5..98ce484f1089 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -571,6 +572,9 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) up->port.dev = dev; + if (uart_console_enabled(&up->port)) + pm_runtime_get_sync(up->port.dev); + serial8250_apply_quirks(up); uart_add_one_port(drv, &up->port); } diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index 251f0018ae8c..dba5950b8d0e 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -200,12 +200,12 @@ static int fintek_8250_rs485_config(struct uart_port *port, if (!pdata) return -EINVAL; - /* Hardware do not support same RTS level on send and receive */ - if (!(rs485->flags & SER_RS485_RTS_ON_SEND) == - !(rs485->flags & SER_RS485_RTS_AFTER_SEND)) - return -EINVAL; if (rs485->flags & SER_RS485_ENABLED) { + /* Hardware do not support same RTS level on send and receive */ + if (!(rs485->flags & SER_RS485_RTS_ON_SEND) == + !(rs485->flags & SER_RS485_RTS_AFTER_SEND)) + return -EINVAL; memset(rs485->padding, 0, sizeof(rs485->padding)); config |= RS485_URA; } else { diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index e0fa24f0f732..43884e8b5161 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1532,6 +1532,8 @@ static inline void __stop_tx(struct uart_8250_port *p) if (em485) { unsigned char lsr = serial_in(p, UART_LSR); + p->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + /* * To provide required timeing and allow FIFO transfer, * __stop_tx_rs485() must be called only when both FIFO and @@ -2951,8 +2953,10 @@ static int serial8250_request_std_resource(struct uart_8250_port *up) case UPIO_MEM32BE: case UPIO_MEM16: case UPIO_MEM: - if (!port->mapbase) + if (!port->mapbase) { + ret = -EINVAL; break; + } if (!request_mem_region(port->mapbase, size, "serial")) { ret = -EBUSY; diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 07b19e97f850..9900ee3f9068 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1326,6 +1326,15 @@ static void pl011_stop_rx(struct uart_port *port) pl011_dma_rx_stop(uap); } +static void pl011_throttle_rx(struct uart_port *port) +{ + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + pl011_stop_rx(port); + spin_unlock_irqrestore(&port->lock, flags); +} + static void pl011_enable_ms(struct uart_port *port) { struct uart_amba_port *uap = @@ -1717,9 +1726,10 @@ static int pl011_allocate_irq(struct uart_amba_port *uap) */ static void pl011_enable_interrupts(struct uart_amba_port *uap) { + unsigned long flags; unsigned int i; - spin_lock_irq(&uap->port.lock); + spin_lock_irqsave(&uap->port.lock, flags); /* Clear out any spuriously appearing RX interrupts */ pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR); @@ -1741,7 +1751,14 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap) if (!pl011_dma_rx_running(uap)) uap->im |= UART011_RXIM; pl011_write(uap->im, uap, REG_IMSC); - spin_unlock_irq(&uap->port.lock); + spin_unlock_irqrestore(&uap->port.lock, flags); +} + +static void pl011_unthrottle_rx(struct uart_port *port) +{ + struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + + pl011_enable_interrupts(uap); } static int pl011_startup(struct uart_port *port) @@ -2116,6 +2133,8 @@ static const struct uart_ops amba_pl011_pops = { .stop_tx = pl011_stop_tx, .start_tx = pl011_start_tx, .stop_rx = pl011_stop_rx, + .throttle = pl011_throttle_rx, + .unthrottle = pl011_unthrottle_rx, .enable_ms = pl011_enable_ms, .break_ctl = pl011_break_ctl, .startup = pl011_startup, diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c index c7f81aa1ce91..5fea9bf86e85 100644 --- a/drivers/tty/serial/digicolor-usart.c +++ b/drivers/tty/serial/digicolor-usart.c @@ -309,6 +309,8 @@ static void digicolor_uart_set_termios(struct uart_port *port, case CS8: default: config |= UA_CONFIG_CHAR_LEN; + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS8; break; } diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index b9f8add284e3..52a603a6f9b8 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -229,8 +229,6 @@ /* IMX lpuart has four extra unused regs located at the beginning */ #define IMX_REG_OFF 0x10 -static DEFINE_IDA(fsl_lpuart_ida); - enum lpuart_type { VF610_LPUART, LS1021A_LPUART, @@ -265,7 +263,6 @@ struct lpuart_port { int rx_dma_rng_buf_len; unsigned int dma_tx_nents; wait_queue_head_t dma_wait; - bool id_allocated; }; struct lpuart_soc_data { @@ -2638,23 +2635,18 @@ static int lpuart_probe(struct platform_device *pdev) ret = of_alias_get_id(np, "serial"); if (ret < 0) { - ret = ida_simple_get(&fsl_lpuart_ida, 0, UART_NR, GFP_KERNEL); - if (ret < 0) { - dev_err(&pdev->dev, "port line is full, add device failed\n"); - return ret; - } - sport->id_allocated = true; + dev_err(&pdev->dev, "failed to get alias id, errno %d\n", ret); + return ret; } if (ret >= ARRAY_SIZE(lpuart_ports)) { dev_err(&pdev->dev, "serial%d out of range\n", ret); - ret = -EINVAL; - goto failed_out_of_range; + return -EINVAL; } sport->port.line = ret; ret = lpuart_enable_clks(sport); if (ret) - goto failed_clock_enable; + return ret; sport->port.uartclk = lpuart_get_baud_clk_rate(sport); lpuart_ports[sport->port.line] = sport; @@ -2697,10 +2689,6 @@ static int lpuart_probe(struct platform_device *pdev) failed_attach_port: failed_irq_request: lpuart_disable_clks(sport); -failed_clock_enable: -failed_out_of_range: - if (sport->id_allocated) - ida_simple_remove(&fsl_lpuart_ida, sport->port.line); return ret; } @@ -2710,9 +2698,6 @@ static int lpuart_remove(struct platform_device *pdev) uart_remove_one_port(&lpuart_reg, &sport->port); - if (sport->id_allocated) - ida_simple_remove(&fsl_lpuart_ida, sport->port.line); - lpuart_disable_clks(sport); if (sport->dma_tx_chan) @@ -2842,7 +2827,6 @@ static int __init lpuart_serial_init(void) static void __exit lpuart_serial_exit(void) { - ida_destroy(&fsl_lpuart_ida); platform_driver_unregister(&lpuart_driver); uart_unregister_driver(&lpuart_reg); } diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c index 94c8281ddb5f..74b325c344da 100644 --- a/drivers/tty/serial/icom.c +++ b/drivers/tty/serial/icom.c @@ -1503,7 +1503,7 @@ static int icom_probe(struct pci_dev *dev, retval = pci_read_config_dword(dev, PCI_COMMAND, &command_reg); if (retval) { dev_err(&dev->dev, "PCI Config read FAILED\n"); - return retval; + goto probe_exit0; } pci_write_config_dword(dev, PCI_COMMAND, diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c index b5898c932036..a9802308ff60 100644 --- a/drivers/tty/serial/lpc32xx_hs.c +++ b/drivers/tty/serial/lpc32xx_hs.c @@ -344,7 +344,7 @@ static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id) LPC32XX_HSUART_IIR(port->membase)); port->icount.overrun++; tty_insert_flip_char(tport, 0, TTY_OVERRUN); - tty_schedule_flip(tport); + tty_flip_buffer_push(tport); } /* Data received? */ diff --git a/drivers/tty/serial/meson_uart.c b/drivers/tty/serial/meson_uart.c index 69eeef9edfa5..3f8ccb87c604 100644 --- a/drivers/tty/serial/meson_uart.c +++ b/drivers/tty/serial/meson_uart.c @@ -255,6 +255,14 @@ static const char *meson_uart_type(struct uart_port *port) return (port->type == PORT_MESON) ? "meson_uart" : NULL; } +/* + * This function is called only from probe() using a temporary io mapping + * in order to perform a reset before setting up the device. Since the + * temporarily mapped region was successfully requested, there can be no + * console on this port at this time. Hence it is not necessary for this + * function to acquire the port->lock. (Since there is no console on this + * port at this time, the port->lock is not initialized yet.) + */ static void meson_uart_reset(struct uart_port *port) { u32 val; @@ -269,9 +277,12 @@ static void meson_uart_reset(struct uart_port *port) static int meson_uart_startup(struct uart_port *port) { + unsigned long flags; u32 val; int ret = 0; + spin_lock_irqsave(&port->lock, flags); + val = readl(port->membase + AML_UART_CONTROL); val |= AML_UART_CLEAR_ERR; writel(val, port->membase + AML_UART_CONTROL); @@ -287,6 +298,8 @@ static int meson_uart_startup(struct uart_port *port) val = (AML_UART_RECV_IRQ(1) | AML_UART_XMIT_IRQ(port->fifosize / 2)); writel(val, port->membase + AML_UART_MISC); + spin_unlock_irqrestore(&port->lock, flags); + ret = request_irq(port->irq, meson_uart_interrupt, 0, port->name, port); diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 26bcbec5422e..27023a56f3ac 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -1593,6 +1593,7 @@ static inline struct uart_port *msm_get_port_from_line(unsigned int line) static void __msm_console_write(struct uart_port *port, const char *s, unsigned int count, bool is_uartdm) { + unsigned long flags; int i; int num_newlines = 0; bool replaced = false; @@ -1610,6 +1611,8 @@ static void __msm_console_write(struct uart_port *port, const char *s, num_newlines++; count += num_newlines; + local_irq_save(flags); + if (port->sysrq) locked = 0; else if (oops_in_progress) @@ -1655,6 +1658,8 @@ static void __msm_console_write(struct uart_port *port, const char *s, if (locked) spin_unlock(&port->lock); + + local_irq_restore(flags); } static void msm_console_write(struct console *co, const char *s, diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 34ff2181afd1..e941f57de953 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -443,13 +443,13 @@ static void mvebu_uart_shutdown(struct uart_port *port) } } -static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) +static unsigned int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) { unsigned int d_divisor, m_divisor; u32 brdv, osamp; if (!port->uartclk) - return -EOPNOTSUPP; + return 0; /* * The baudrate is derived from the UART clock thanks to two divisors: @@ -473,7 +473,7 @@ static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud) osamp &= ~OSAMP_DIVISORS_MASK; writel(osamp, port->membase + UART_OSAMP); - return 0; + return DIV_ROUND_CLOSEST(port->uartclk, d_divisor * m_divisor); } static void mvebu_uart_set_termios(struct uart_port *port, @@ -510,15 +510,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, max_baud = 230400; baud = uart_get_baud_rate(port, termios, old, min_baud, max_baud); - if (mvebu_uart_baud_rate_set(port, baud)) { - /* No clock available, baudrate cannot be changed */ - if (old) - baud = uart_get_baud_rate(port, old, NULL, - min_baud, max_baud); - } else { - tty_termios_encode_baud_rate(termios, baud, baud); - uart_update_timeout(port, termios->c_cflag, baud); - } + baud = mvebu_uart_baud_rate_set(port, baud); + + /* In case baudrate cannot be changed, report previous old value */ + if (baud == 0 && old) + baud = tty_termios_baud_rate(old); /* Only the following flag changes are supported */ if (old) { @@ -529,6 +525,11 @@ static void mvebu_uart_set_termios(struct uart_port *port, termios->c_cflag |= CS8; } + if (baud != 0) { + tty_termios_encode_baud_rate(termios, baud, baud); + uart_update_timeout(port, termios->c_cflag, baud); + } + spin_unlock_irqrestore(&port->lock, flags); } diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c index c149f8c30007..a0d4bffe70bd 100644 --- a/drivers/tty/serial/owl-uart.c +++ b/drivers/tty/serial/owl-uart.c @@ -695,6 +695,7 @@ static int owl_uart_probe(struct platform_device *pdev) owl_port->port.uartclk = clk_get_rate(owl_port->clk); if (owl_port->port.uartclk == 0) { dev_err(&pdev->dev, "clock rate is zero\n"); + clk_disable_unprepare(owl_port->clk); return -EINVAL; } owl_port->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP | UPF_LOW_LATENCY; diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index a7363bc66c11..351ad0b02029 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -628,22 +628,6 @@ static int push_rx(struct eg20t_port *priv, const unsigned char *buf, return 0; } -static int pop_tx_x(struct eg20t_port *priv, unsigned char *buf) -{ - int ret = 0; - struct uart_port *port = &priv->port; - - if (port->x_char) { - dev_dbg(priv->port.dev, "%s:X character send %02x (%lu)\n", - __func__, port->x_char, jiffies); - buf[0] = port->x_char; - port->x_char = 0; - ret = 1; - } - - return ret; -} - static int dma_push_rx(struct eg20t_port *priv, int size) { int room; @@ -893,9 +877,10 @@ static unsigned int handle_tx(struct eg20t_port *priv) fifo_size = max(priv->fifo_size, 1); tx_empty = 1; - if (pop_tx_x(priv, xmit->buf)) { - pch_uart_hal_write(priv, xmit->buf, 1); + if (port->x_char) { + pch_uart_hal_write(priv, &port->x_char, 1); port->icount.tx++; + port->x_char = 0; tx_empty = 0; fifo_size--; } @@ -950,9 +935,11 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv) } fifo_size = max(priv->fifo_size, 1); - if (pop_tx_x(priv, xmit->buf)) { - pch_uart_hal_write(priv, xmit->buf, 1); + + if (port->x_char) { + pch_uart_hal_write(priv, &port->x_char, 1); port->icount.tx++; + port->x_char = 0; fifo_size--; } diff --git a/drivers/tty/serial/rda-uart.c b/drivers/tty/serial/rda-uart.c index 85366e059258..a45069e7ebea 100644 --- a/drivers/tty/serial/rda-uart.c +++ b/drivers/tty/serial/rda-uart.c @@ -262,6 +262,8 @@ static void rda_uart_set_termios(struct uart_port *port, fallthrough; case CS7: ctrl &= ~RDA_UART_DBITS_8; + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS7; break; default: ctrl |= RDA_UART_DBITS_8; diff --git a/drivers/tty/serial/sa1100.c b/drivers/tty/serial/sa1100.c index f5fab1dd96bc..aa1cf2ae17a9 100644 --- a/drivers/tty/serial/sa1100.c +++ b/drivers/tty/serial/sa1100.c @@ -448,6 +448,8 @@ sa1100_set_termios(struct uart_port *port, struct ktermios *termios, baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); quot = uart_get_divisor(port, baud); + del_timer_sync(&sport->timer); + spin_lock_irqsave(&sport->port.lock, flags); sport->port.read_status_mask &= UTSR0_TO_SM(UTSR0_TFS); @@ -478,8 +480,6 @@ sa1100_set_termios(struct uart_port *port, struct ktermios *termios, UTSR1_TO_SM(UTSR1_ROR); } - del_timer_sync(&sport->timer); - /* * Update the per-port timeout. */ diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index 81faead3c4f8..263c33260d8a 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -361,8 +361,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport) /* Enable tx dma mode */ ucon = rd_regl(port, S3C2410_UCON); ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK); - ucon |= (dma_get_cache_alignment() >= 16) ? - S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1; + ucon |= S3C64XX_UCON_TXBURST_1; ucon |= S3C64XX_UCON_TXMODE_DMA; wr_regl(port, S3C2410_UCON, ucon); @@ -634,7 +633,7 @@ static void enable_rx_dma(struct s3c24xx_uart_port *ourport) S3C64XX_UCON_DMASUS_EN | S3C64XX_UCON_TIMEOUT_EN | S3C64XX_UCON_RXMODE_MASK); - ucon |= S3C64XX_UCON_RXBURST_16 | + ucon |= S3C64XX_UCON_RXBURST_1 | 0xf << S3C64XX_UCON_TIMEOUT_SHIFT | S3C64XX_UCON_EMPTYINT_EN | S3C64XX_UCON_TIMEOUT_EN | diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 6ff86bebc8b7..37f39eb32494 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -144,6 +144,11 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) unsigned long flags; unsigned int old; + if (port->rs485.flags & SER_RS485_ENABLED) { + set &= ~TIOCM_RTS; + clear &= ~TIOCM_RTS; + } + spin_lock_irqsave(&port->lock, flags); old = port->mctrl; port->mctrl = (old & ~clear) | set; @@ -157,23 +162,10 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) static void uart_port_dtr_rts(struct uart_port *uport, int raise) { - int rs485_on = uport->rs485_config && - (uport->rs485.flags & SER_RS485_ENABLED); - int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND); - - if (raise) { - if (rs485_on && RTS_after_send) { - uart_set_mctrl(uport, TIOCM_DTR); - uart_clear_mctrl(uport, TIOCM_RTS); - } else { - uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS); - } - } else { - unsigned int clear = TIOCM_DTR; - - clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0; - uart_clear_mctrl(uport, clear); - } + if (raise) + uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS); + else + uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS); } /* @@ -1109,11 +1101,6 @@ uart_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) goto out; if (!tty_io_error(tty)) { - if (uport->rs485.flags & SER_RS485_ENABLED) { - set &= ~TIOCM_RTS; - clear &= ~TIOCM_RTS; - } - uart_update_mctrl(uport, set, clear); ret = 0; } @@ -1947,11 +1934,6 @@ static int uart_proc_show(struct seq_file *m, void *v) } #endif -static inline bool uart_console_enabled(struct uart_port *port) -{ - return uart_console(port) && (port->cons->flags & CON_ENABLED); -} - static void uart_port_spin_lock_init(struct uart_port *port) { spin_lock_init(&port->lock); @@ -2417,6 +2399,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, */ spin_lock_irqsave(&port->lock, flags); port->mctrl &= TIOCM_DTR; + if (port->rs485.flags & SER_RS485_ENABLED && + !(port->rs485.flags & SER_RS485_RTS_AFTER_SEND)) + port->mctrl |= TIOCM_RTS; port->ops->set_mctrl(port, port->mctrl); spin_unlock_irqrestore(&port->lock, flags); diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c index 7a07e7272de1..7beec331010c 100644 --- a/drivers/tty/serial/serial_txx9.c +++ b/drivers/tty/serial/serial_txx9.c @@ -644,6 +644,8 @@ serial_txx9_set_termios(struct uart_port *port, struct ktermios *termios, case CS6: /* not supported */ case CS8: cval |= TXX9_SILCR_UMODE_8BIT; + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS8; break; } diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index f700bfaef129..8d924727d6f0 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2392,8 +2392,12 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios, int best_clk = -1; unsigned long flags; - if ((termios->c_cflag & CSIZE) == CS7) + if ((termios->c_cflag & CSIZE) == CS7) { smr_val |= SCSMR_CHR; + } else { + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS8; + } if (termios->c_cflag & PARENB) smr_val |= SCSMR_PE; if (termios->c_cflag & PARODD) diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 214bf3086c68..91952be01074 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -667,12 +667,16 @@ static void sifive_serial_set_termios(struct uart_port *port, int rate; char nstop; - if ((termios->c_cflag & CSIZE) != CS8) + if ((termios->c_cflag & CSIZE) != CS8) { dev_err_once(ssp->port.dev, "only 8-bit words supported\n"); + termios->c_cflag &= ~CSIZE; + termios->c_cflag |= CS8; + } if (termios->c_iflag & (INPCK | PARMRK)) dev_err_once(ssp->port.dev, "parity checking not supported\n"); if (termios->c_iflag & BRKINT) dev_err_once(ssp->port.dev, "BREAK detection not supported\n"); + termios->c_iflag &= ~(INPCK|PARMRK|BRKINT); /* Set number of stop bits */ nstop = (termios->c_cflag & CSTOPB) ? 2 : 1; @@ -999,7 +1003,7 @@ static int sifive_serial_probe(struct platform_device *pdev) /* Set up clock divider */ ssp->clkin_rate = clk_get_rate(ssp->clk); ssp->baud_rate = SIFIVE_DEFAULT_BAUD_RATE; - ssp->port.uartclk = ssp->baud_rate * 16; + ssp->port.uartclk = ssp->clkin_rate; __ssp_update_div(ssp); platform_set_drvdata(pdev, ssp); diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c index e7048515a79c..97d36f870f64 100644 --- a/drivers/tty/serial/st-asc.c +++ b/drivers/tty/serial/st-asc.c @@ -535,10 +535,14 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios, /* set character length */ if ((cflag & CSIZE) == CS7) { ctrl_val |= ASC_CTL_MODE_7BIT_PAR; + cflag |= PARENB; } else { ctrl_val |= (cflag & PARENB) ? ASC_CTL_MODE_8BIT_PAR : ASC_CTL_MODE_8BIT; + cflag &= ~CSIZE; + cflag |= CS8; } + termios->c_cflag = cflag; /* set stop bit */ ctrl_val |= (cflag & CSTOPB) ? ASC_CTL_STOP_2BIT : ASC_CTL_STOP_1BIT; diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 6afae051ba8d..9377da1e97c0 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -70,6 +70,8 @@ static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE, *cr3 |= USART_CR3_DEM; over8 = *cr1 & USART_CR1_OVER8; + *cr1 &= ~(USART_CR1_DEDT_MASK | USART_CR1_DEAT_MASK); + if (over8) rs485_deat_dedt = delay_ADE * baud * 8; else @@ -810,13 +812,22 @@ static void stm32_usart_set_termios(struct uart_port *port, * CS8 or (CS7 + parity), 8 bits word aka [M1:M0] = 0b00 * M0 and M1 already cleared by cr1 initialization. */ - if (bits == 9) + if (bits == 9) { cr1 |= USART_CR1_M0; - else if ((bits == 7) && cfg->has_7bits_data) + } else if ((bits == 7) && cfg->has_7bits_data) { cr1 |= USART_CR1_M1; - else if (bits != 8) + } else if (bits != 8) { dev_dbg(port->dev, "Unsupported data bits config: %u bits\n" , bits); + cflag &= ~CSIZE; + cflag |= CS8; + termios->c_cflag = cflag; + bits = 8; + if (cflag & PARENB) { + bits++; + cr1 |= USART_CR1_M0; + } + } if (ofs->rtor != UNDEF_REG && (stm32_port->rx_ch || stm32_port->fifoen)) { diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 1a0c7beec101..0569d5949133 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1749,6 +1749,8 @@ static int hdlcdev_init(struct slgt_info *info) */ static void hdlcdev_exit(struct slgt_info *info) { + if (!info->netdev) + return; unregister_hdlc_device(info->netdev); free_netdev(info->netdev); info->netdev = NULL; diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 959f9e121cc6..7ca209d4e088 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -231,8 +231,10 @@ static void showacpu(void *dummy) unsigned long flags; /* Idle CPUs have no interesting backtrace. */ - if (idle_cpu(smp_processor_id())) + if (idle_cpu(smp_processor_id())) { + pr_info("CPU%d: backtrace skipped as idling\n", smp_processor_id()); return; + } raw_spin_lock_irqsave(&show_lock, flags); pr_info("CPU%d:\n", smp_processor_id()); @@ -259,10 +261,13 @@ static void sysrq_handle_showallcpus(int key) if (in_irq()) regs = get_irq_regs(); - if (regs) { - pr_info("CPU%d:\n", smp_processor_id()); + + pr_info("CPU%d:\n", smp_processor_id()); + if (regs) show_regs(regs); - } + else + show_stack(NULL, NULL, KERN_INFO); + schedule_work(&sysrq_showallcpus); } } diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 0fc473321d3e..5bbc2e010b48 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -172,7 +172,8 @@ static struct tty_buffer *tty_buffer_alloc(struct tty_port *port, size_t size) have queued and recycle that ? */ if (atomic_read(&port->buf.mem_used) > port->buf.mem_limit) return NULL; - p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC); + p = kmalloc(sizeof(struct tty_buffer) + 2 * size, + GFP_ATOMIC | __GFP_NOWARN); if (p == NULL) return NULL; @@ -393,27 +394,6 @@ int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) } EXPORT_SYMBOL(__tty_insert_flip_char); -/** - * tty_schedule_flip - push characters to ldisc - * @port: tty port to push from - * - * Takes any pending buffers and transfers their ownership to the - * ldisc side of the queue. It then schedules those characters for - * processing by the line discipline. - */ - -void tty_schedule_flip(struct tty_port *port) -{ - struct tty_bufhead *buf = &port->buf; - - /* paired w/ acquire in flush_to_ldisc(); ensures - * flush_to_ldisc() sees buffer data. - */ - smp_store_release(&buf->tail->commit, buf->tail->used); - queue_work(system_unbound_wq, &buf->work); -} -EXPORT_SYMBOL(tty_schedule_flip); - /** * tty_prepare_flip_string - make room for characters * @port: tty port @@ -543,6 +523,15 @@ static void flush_to_ldisc(struct work_struct *work) } +static inline void tty_flip_buffer_commit(struct tty_buffer *tail) +{ + /* + * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees + * buffer data. + */ + smp_store_release(&tail->commit, tail->used); +} + /** * tty_flip_buffer_push - terminal * @port: tty port to push @@ -556,10 +545,44 @@ static void flush_to_ldisc(struct work_struct *work) void tty_flip_buffer_push(struct tty_port *port) { - tty_schedule_flip(port); + struct tty_bufhead *buf = &port->buf; + + tty_flip_buffer_commit(buf->tail); + queue_work(system_unbound_wq, &buf->work); } EXPORT_SYMBOL(tty_flip_buffer_push); +/** + * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and + * push + * @port: tty port + * @chars: characters + * @size: size + * + * The function combines tty_insert_flip_string() and tty_flip_buffer_push() + * with the exception of properly holding the @port->lock. + * + * To be used only internally (by pty currently). + * + * Returns: the number added. + */ +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t size) +{ + struct tty_bufhead *buf = &port->buf; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + size = tty_insert_flip_string(port, chars, size); + if (size) + tty_flip_buffer_commit(buf->tail); + spin_unlock_irqrestore(&port->lock, flags); + + queue_work(system_unbound_wq, &buf->work); + + return size; +} + /** * tty_buffer_init - prepare a tty buffer structure * @port: tty port to initialise diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 78acc270e39a..aa0026a9839c 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -311,7 +311,7 @@ int kbd_rate(struct kbd_repeat *rpt) static void put_queue(struct vc_data *vc, int ch) { tty_insert_flip_char(&vc->port, ch, 0); - tty_schedule_flip(&vc->port); + tty_flip_buffer_push(&vc->port); } static void puts_queue(struct vc_data *vc, char *cp) @@ -320,7 +320,7 @@ static void puts_queue(struct vc_data *vc, char *cp) tty_insert_flip_char(&vc->port, *cp, 0); cp++; } - tty_schedule_flip(&vc->port); + tty_flip_buffer_push(&vc->port); } static void applkey(struct vc_data *vc, int key, char mode) @@ -565,7 +565,7 @@ static void fn_inc_console(struct vc_data *vc) static void fn_send_intr(struct vc_data *vc) { tty_insert_flip_char(&vc->port, 0, TTY_BREAK); - tty_schedule_flip(&vc->port); + tty_flip_buffer_push(&vc->port); } static void fn_scroll_forw(struct vc_data *vc) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index a7ee1171eeb3..2ebe73b116dc 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -855,7 +855,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr) unsigned short *p = (unsigned short *) vc->vc_pos; vc_uniscr_delete(vc, nr); - scr_memcpyw(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2); + scr_memmovew(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2); scr_memsetw(p + vc->vc_cols - vc->state.x - nr, vc->vc_video_erase_char, nr * 2); vc->vc_need_wrap = 0; @@ -1834,7 +1834,7 @@ static void csi_m(struct vc_data *vc) static void respond_string(const char *p, size_t len, struct tty_port *port) { tty_insert_flip_string(port, p, len); - tty_schedule_flip(port); + tty_flip_buffer_push(port); } static void cursor_report(struct vc_data *vc, struct tty_struct *tty) @@ -4625,16 +4625,8 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op) if (op->data && font.charcount > op->charcount) rc = -ENOSPC; - if (!(op->flags & KD_FONT_FLAG_OLD)) { - if (font.width > op->width || font.height > op->height) - rc = -ENOSPC; - } else { - if (font.width != 8) - rc = -EIO; - else if ((op->height && font.height > op->height) || - font.height > 32) - rc = -ENOSPC; - } + if (font.width > op->width || font.height > op->height) + rc = -ENOSPC; if (rc) goto out; @@ -4662,7 +4654,7 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op) return -EINVAL; if (op->charcount > 512) return -EINVAL; - if (op->width <= 0 || op->width > 32 || op->height > 32) + if (op->width <= 0 || op->width > 32 || !op->height || op->height > 32) return -EINVAL; size = (op->width+7)/8 * 32 * op->charcount; if (size > max_font_size) @@ -4672,31 +4664,6 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op) if (IS_ERR(font.data)) return PTR_ERR(font.data); - if (!op->height) { /* Need to guess font height [compat] */ - int h, i; - u8 *charmap = font.data; - - /* - * If from KDFONTOP ioctl, don't allow things which can be done - * in userland,so that we can get rid of this soon - */ - if (!(op->flags & KD_FONT_FLAG_OLD)) { - kfree(font.data); - return -EINVAL; - } - - for (h = 32; h > 0; h--) - for (i = 0; i < op->charcount; i++) - if (charmap[32*i+h-1]) - goto nonzero; - - kfree(font.data); - return -EINVAL; - - nonzero: - op->height = h; - } - font.charcount = op->charcount; font.width = op->width; font.height = op->height; diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index a9c6ea8986af..b10b86e2c17e 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -486,70 +486,6 @@ static int vt_k_ioctl(struct tty_struct *tty, unsigned int cmd, return 0; } -static inline int do_fontx_ioctl(struct vc_data *vc, int cmd, - struct consolefontdesc __user *user_cfd, - struct console_font_op *op) -{ - struct consolefontdesc cfdarg; - int i; - - if (copy_from_user(&cfdarg, user_cfd, sizeof(struct consolefontdesc))) - return -EFAULT; - - switch (cmd) { - case PIO_FONTX: - op->op = KD_FONT_OP_SET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = cfdarg.chardata; - return con_font_op(vc, op); - - case GIO_FONTX: - op->op = KD_FONT_OP_GET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = cfdarg.chardata; - i = con_font_op(vc, op); - if (i) - return i; - cfdarg.charheight = op->height; - cfdarg.charcount = op->charcount; - if (copy_to_user(user_cfd, &cfdarg, sizeof(struct consolefontdesc))) - return -EFAULT; - return 0; - } - return -EINVAL; -} - -static int vt_io_fontreset(struct vc_data *vc, struct console_font_op *op) -{ - int ret; - - if (__is_defined(BROKEN_GRAPHICS_PROGRAMS)) { - /* - * With BROKEN_GRAPHICS_PROGRAMS defined, the default font is - * not saved. - */ - return -ENOSYS; - } - - op->op = KD_FONT_OP_SET_DEFAULT; - op->data = NULL; - ret = con_font_op(vc, op); - if (ret) - return ret; - - console_lock(); - con_set_default_unimap(vc); - console_unlock(); - - return 0; -} - static inline int do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, bool perm, struct vc_data *vc) { @@ -574,29 +510,7 @@ static inline int do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, static int vt_io_ioctl(struct vc_data *vc, unsigned int cmd, void __user *up, bool perm) { - struct console_font_op op; /* used in multiple places here */ - switch (cmd) { - case PIO_FONT: - if (!perm) - return -EPERM; - op.op = KD_FONT_OP_SET; - op.flags = KD_FONT_FLAG_OLD | KD_FONT_FLAG_DONT_RECALC; /* Compatibility */ - op.width = 8; - op.height = 0; - op.charcount = 256; - op.data = up; - return con_font_op(vc, &op); - - case GIO_FONT: - op.op = KD_FONT_OP_GET; - op.flags = KD_FONT_FLAG_OLD; - op.width = 8; - op.height = 32; - op.charcount = 256; - op.data = up; - return con_font_op(vc, &op); - case PIO_CMAP: if (!perm) return -EPERM; @@ -605,20 +519,6 @@ static int vt_io_ioctl(struct vc_data *vc, unsigned int cmd, void __user *up, case GIO_CMAP: return con_get_cmap(up); - case PIO_FONTX: - if (!perm) - return -EPERM; - - fallthrough; - case GIO_FONTX: - return do_fontx_ioctl(vc, cmd, up, &op); - - case PIO_FONTRESET: - if (!perm) - return -EPERM; - - return vt_io_fontreset(vc, &op); - case PIO_SCRNMAP: if (!perm) return -EPERM; @@ -1099,54 +999,6 @@ void vc_SAK(struct work_struct *work) #ifdef CONFIG_COMPAT -struct compat_consolefontdesc { - unsigned short charcount; /* characters in font (256 or 512) */ - unsigned short charheight; /* scan lines per character (1-32) */ - compat_caddr_t chardata; /* font data in expanded form */ -}; - -static inline int -compat_fontx_ioctl(struct vc_data *vc, int cmd, - struct compat_consolefontdesc __user *user_cfd, - int perm, struct console_font_op *op) -{ - struct compat_consolefontdesc cfdarg; - int i; - - if (copy_from_user(&cfdarg, user_cfd, sizeof(struct compat_consolefontdesc))) - return -EFAULT; - - switch (cmd) { - case PIO_FONTX: - if (!perm) - return -EPERM; - op->op = KD_FONT_OP_SET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = compat_ptr(cfdarg.chardata); - return con_font_op(vc, op); - - case GIO_FONTX: - op->op = KD_FONT_OP_GET; - op->flags = KD_FONT_FLAG_OLD; - op->width = 8; - op->height = cfdarg.charheight; - op->charcount = cfdarg.charcount; - op->data = compat_ptr(cfdarg.chardata); - i = con_font_op(vc, op); - if (i) - return i; - cfdarg.charheight = op->height; - cfdarg.charcount = op->charcount; - if (copy_to_user(user_cfd, &cfdarg, sizeof(struct compat_consolefontdesc))) - return -EFAULT; - return 0; - } - return -EINVAL; -} - struct compat_console_font_op { compat_uint_t op; /* operation code KD_FONT_OP_* */ compat_uint_t flags; /* KD_FONT_FLAG_* */ @@ -1223,9 +1075,6 @@ long vt_compat_ioctl(struct tty_struct *tty, /* * these need special handlers for incompatible data structures */ - case PIO_FONTX: - case GIO_FONTX: - return compat_fontx_ioctl(vc, cmd, up, perm, &op); case KDFONTOP: return compat_kdfontop_ioctl(up, perm, &op, vc); diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 5f35cdd2cf1d..67d8da04848e 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1034,6 +1034,9 @@ isr_setup_status_complete(struct usb_ep *ep, struct usb_request *req) struct ci_hdrc *ci = req->context; unsigned long flags; + if (req->status < 0) + return; + if (ci->setaddr) { hw_usb_set_address(ci, ci->address); ci->setaddr = false; diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index ec0d6c50610c..eee78cbfaa72 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -614,10 +614,10 @@ const struct dev_pm_ops usb_hcd_pci_pm_ops = { .suspend_noirq = hcd_pci_suspend_noirq, .resume_noirq = hcd_pci_resume_noirq, .resume = hcd_pci_resume, - .freeze = check_root_hub_suspended, + .freeze = hcd_pci_suspend, .freeze_noirq = check_root_hub_suspended, .thaw_noirq = NULL, - .thaw = NULL, + .thaw = hcd_pci_resume, .poweroff = hcd_pci_suspend, .poweroff_noirq = hcd_pci_suspend_noirq, .restore_noirq = hcd_pci_resume_noirq, diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index ddd1d3eef912..bf5e37667697 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2661,6 +2661,7 @@ int usb_add_hcd(struct usb_hcd *hcd, { int retval; struct usb_device *rhdev; + struct usb_hcd *shared_hcd; if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) { hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev); @@ -2817,13 +2818,26 @@ int usb_add_hcd(struct usb_hcd *hcd, goto err_hcd_driver_start; } - /* starting here, usbcore will pay attention to this root hub */ - retval = register_root_hub(hcd); - if (retval != 0) - goto err_register_root_hub; + /* starting here, usbcore will pay attention to the shared HCD roothub */ + shared_hcd = hcd->shared_hcd; + if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) { + retval = register_root_hub(shared_hcd); + if (retval != 0) + goto err_register_root_hub; - if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) - usb_hcd_poll_rh_status(hcd); + if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd)) + usb_hcd_poll_rh_status(shared_hcd); + } + + /* starting here, usbcore will pay attention to this root hub */ + if (!HCD_DEFER_RH_REGISTER(hcd)) { + retval = register_root_hub(hcd); + if (retval != 0) + goto err_register_root_hub; + + if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) + usb_hcd_poll_rh_status(hcd); + } return retval; @@ -2866,6 +2880,7 @@ EXPORT_SYMBOL_GPL(usb_add_hcd); void usb_remove_hcd(struct usb_hcd *hcd) { struct usb_device *rhdev = hcd->self.root_hub; + bool rh_registered; dev_info(hcd->self.controller, "remove, state %x\n", hcd->state); @@ -2876,6 +2891,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) dev_dbg(hcd->self.controller, "roothub graceful disconnect\n"); spin_lock_irq (&hcd_root_hub_lock); + rh_registered = hcd->rh_registered; hcd->rh_registered = 0; spin_unlock_irq (&hcd_root_hub_lock); @@ -2885,7 +2901,8 @@ void usb_remove_hcd(struct usb_hcd *hcd) cancel_work_sync(&hcd->died_work); mutex_lock(&usb_bus_idr_lock); - usb_disconnect(&rhdev); /* Sets rhdev to NULL */ + if (rh_registered) + usb_disconnect(&rhdev); /* Sets rhdev to NULL */ mutex_unlock(&usb_bus_idr_lock); /* diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 7e5fd6afd9f4..f03ee889ecc7 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -511,6 +511,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, + /* DELL USB GEN2 */ + { USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME }, + /* VCOM device */ { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index ec54971063f8..64485f82dc5b 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -4518,7 +4518,6 @@ static int dwc2_hsotg_udc_start(struct usb_gadget *gadget, WARN_ON(hsotg->driver); - driver->driver.bus = NULL; hsotg->driver = driver; hsotg->gadget.dev.of_node = hsotg->dev->of_node; hsotg->gadget.speed = USB_SPEED_UNKNOWN; diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 30919f741b7f..9279d3d3698c 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5076,7 +5076,7 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { retval = -EINVAL; - goto error1; + goto error2; } hcd->rsrc_start = res->start; hcd->rsrc_len = resource_size(res); diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 492977514d95..73c20a93208e 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -227,7 +227,7 @@ static void dwc3_pci_resume_work(struct work_struct *work) int ret; ret = pm_runtime_get_sync(&dwc3->dev); - if (ret) { + if (ret < 0) { pm_runtime_put_sync_autosuspend(&dwc3->dev); return; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 016050adf800..d769454ab482 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3365,14 +3365,14 @@ static bool dwc3_gadget_endpoint_trbs_complete(struct dwc3_ep *dep, struct dwc3 *dwc = dep->dwc; bool no_started_trb = true; - if (!dep->endpoint.desc) - return no_started_trb; - dwc3_gadget_ep_cleanup_completed_requests(dep, event, status); if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) goto out; + if (!dep->endpoint.desc) + return no_started_trb; + if (usb_endpoint_xfer_isoc(dep->endpoint.desc) && list_empty(&dep->started_list) && (list_empty(&dep->pending_list) || status == -EXDEV)) @@ -4204,7 +4204,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) } evt->count = 0; - evt->flags &= ~DWC3_EVENT_PENDING; ret = IRQ_HANDLED; /* Unmask interrupt */ @@ -4217,6 +4216,9 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval); } + /* Keep the clearing of DWC3_EVENT_PENDING at the end */ + evt->flags &= ~DWC3_EVENT_PENDING; + return ret; } diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index e1c7826f33e0..785e03fa1045 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -773,9 +773,13 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, dev->qmult = qmult; snprintf(net->name, sizeof(net->name), "%s%%d", netname); - if (get_ether_addr(dev_addr, net->dev_addr)) + if (get_ether_addr(dev_addr, net->dev_addr)) { + net->addr_assign_type = NET_ADDR_RANDOM; dev_warn(&g->dev, "using random %s ethernet address\n", "self"); + } else { + net->addr_assign_type = NET_ADDR_SET; + } if (get_ether_addr(host_addr, dev->host_mac)) dev_warn(&g->dev, "using random %s ethernet address\n", "host"); @@ -832,6 +836,9 @@ struct net_device *gether_setup_name_default(const char *netname) INIT_LIST_HEAD(&dev->tx_reqs); INIT_LIST_HEAD(&dev->rx_reqs); + /* by default we always have a random MAC address */ + net->addr_assign_type = NET_ADDR_RANDOM; + skb_queue_head_init(&dev->rx_frames); /* network device setup */ @@ -869,7 +876,6 @@ int gether_register_netdev(struct net_device *net) g = dev->gadget; memcpy(net->dev_addr, dev->dev_mac, ETH_ALEN); - net->addr_assign_type = NET_ADDR_RANDOM; status = register_netdev(net); if (status < 0) { @@ -909,6 +915,7 @@ int gether_set_dev_addr(struct net_device *net, const char *dev_addr) if (get_ether_addr(dev_addr, new_addr)) return -EINVAL; memcpy(dev->dev_mac, new_addr, ETH_ALEN); + net->addr_assign_type = NET_ADDR_SET; return 0; } EXPORT_SYMBOL_GPL(gether_set_dev_addr); diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 33efa6915b91..b496ca937dee 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,9 @@ MODULE_LICENSE("GPL"); /*----------------------------------------------------------------------*/ +static DEFINE_IDA(driver_id_numbers); +#define DRIVER_DRIVER_NAME_LENGTH_MAX 32 + #define RAW_EVENT_QUEUE_SIZE 16 struct raw_event_queue { @@ -144,6 +148,7 @@ enum dev_state { STATE_DEV_INVALID = 0, STATE_DEV_OPENED, STATE_DEV_INITIALIZED, + STATE_DEV_REGISTERING, STATE_DEV_RUNNING, STATE_DEV_CLOSED, STATE_DEV_FAILED @@ -159,6 +164,9 @@ struct raw_dev { /* Reference to misc device: */ struct device *dev; + /* Make driver names unique */ + int driver_id_number; + /* Protected by lock: */ enum dev_state state; bool gadget_registered; @@ -187,6 +195,7 @@ static struct raw_dev *dev_new(void) spin_lock_init(&dev->lock); init_completion(&dev->ep0_done); raw_event_queue_init(&dev->queue); + dev->driver_id_number = -1; return dev; } @@ -197,6 +206,9 @@ static void dev_free(struct kref *kref) kfree(dev->udc_name); kfree(dev->driver.udc_name); + kfree(dev->driver.driver.name); + if (dev->driver_id_number >= 0) + ida_free(&driver_id_numbers, dev->driver_id_number); if (dev->req) { if (dev->ep0_urb_queued) usb_ep_dequeue(dev->gadget->ep0, dev->req); @@ -417,9 +429,11 @@ static int raw_release(struct inode *inode, struct file *fd) static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) { int ret = 0; + int driver_id_number; struct usb_raw_init arg; char *udc_driver_name; char *udc_device_name; + char *driver_driver_name; unsigned long flags; if (copy_from_user(&arg, (void __user *)value, sizeof(arg))) @@ -438,36 +452,43 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) return -EINVAL; } + driver_id_number = ida_alloc(&driver_id_numbers, GFP_KERNEL); + if (driver_id_number < 0) + return driver_id_number; + + driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL); + if (!driver_driver_name) { + ret = -ENOMEM; + goto out_free_driver_id_number; + } + snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX, + DRIVER_NAME ".%d", driver_id_number); + udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); - if (!udc_driver_name) - return -ENOMEM; + if (!udc_driver_name) { + ret = -ENOMEM; + goto out_free_driver_driver_name; + } ret = strscpy(udc_driver_name, &arg.driver_name[0], UDC_NAME_LENGTH_MAX); - if (ret < 0) { - kfree(udc_driver_name); - return ret; - } + if (ret < 0) + goto out_free_udc_driver_name; ret = 0; udc_device_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); if (!udc_device_name) { - kfree(udc_driver_name); - return -ENOMEM; + ret = -ENOMEM; + goto out_free_udc_driver_name; } ret = strscpy(udc_device_name, &arg.device_name[0], UDC_NAME_LENGTH_MAX); - if (ret < 0) { - kfree(udc_driver_name); - kfree(udc_device_name); - return ret; - } + if (ret < 0) + goto out_free_udc_device_name; ret = 0; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_OPENED) { dev_dbg(dev->dev, "fail, device is not opened\n"); - kfree(udc_driver_name); - kfree(udc_device_name); ret = -EINVAL; goto out_unlock; } @@ -482,14 +503,25 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) dev->driver.suspend = gadget_suspend; dev->driver.resume = gadget_resume; dev->driver.reset = gadget_reset; - dev->driver.driver.name = DRIVER_NAME; + dev->driver.driver.name = driver_driver_name; dev->driver.udc_name = udc_device_name; dev->driver.match_existing_only = 1; + dev->driver_id_number = driver_id_number; dev->state = STATE_DEV_INITIALIZED; + spin_unlock_irqrestore(&dev->lock, flags); + return ret; out_unlock: spin_unlock_irqrestore(&dev->lock, flags); +out_free_udc_device_name: + kfree(udc_device_name); +out_free_udc_driver_name: + kfree(udc_driver_name); +out_free_driver_driver_name: + kfree(driver_driver_name); +out_free_driver_id_number: + ida_free(&driver_id_numbers, driver_id_number); return ret; } @@ -507,6 +539,7 @@ static int raw_ioctl_run(struct raw_dev *dev, unsigned long value) ret = -EINVAL; goto out_unlock; } + dev->state = STATE_DEV_REGISTERING; spin_unlock_irqrestore(&dev->lock, flags); ret = usb_gadget_probe_driver(&dev->driver); diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c index 3f1c62adce4b..314cb5ea061a 100644 --- a/drivers/usb/gadget/udc/lpc32xx_udc.c +++ b/drivers/usb/gadget/udc/lpc32xx_udc.c @@ -3015,6 +3015,7 @@ static int lpc32xx_udc_probe(struct platform_device *pdev) } udc->isp1301_i2c_client = isp1301_get_client(isp1301_node); + of_node_put(isp1301_node); if (!udc->isp1301_i2c_client) { return -EPROBE_DEFER; } diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index 3055d9abfec3..3e5c54742bef 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -1541,10 +1541,12 @@ static int isp116x_remove(struct platform_device *pdev) iounmap(isp116x->data_reg); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - release_mem_region(res->start, 2); + if (res) + release_mem_region(res->start, 2); iounmap(isp116x->addr_reg); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, 2); + if (res) + release_mem_region(res->start, 2); usb_put_hcd(hcd); return 0; diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c index e832909a924f..6df2881cd7b9 100644 --- a/drivers/usb/host/oxu210hp-hcd.c +++ b/drivers/usb/host/oxu210hp-hcd.c @@ -3908,8 +3908,10 @@ static int oxu_bus_suspend(struct usb_hcd *hcd) } } + spin_unlock_irq(&oxu->lock); /* turn off now-idle HC */ del_timer_sync(&oxu->watchdog); + spin_lock_irq(&oxu->lock); ehci_halt(oxu); hcd->state = HC_STATE_SUSPENDED; diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 2d805349527d..b0f5b0c83e38 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -571,7 +571,7 @@ struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd) * It will release and re-aquire the lock while calling ACPI * method. */ -static void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, +void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index, bool on, unsigned long *flags) __must_hold(&xhci->lock) { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 48e2c0474189..8952492d43be 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -59,7 +59,10 @@ #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e +#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI 0x464e #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed +#define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI 0xa71e +#define PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI 0x7ec0 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba @@ -263,7 +266,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index f17d9dce4593..82eb4a65e068 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -774,6 +774,8 @@ static void xhci_stop(struct usb_hcd *hcd) void xhci_shutdown(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); + unsigned long flags; + int i; if (xhci->quirks & XHCI_SPURIOUS_REBOOT) usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev)); @@ -789,12 +791,21 @@ void xhci_shutdown(struct usb_hcd *hcd) del_timer_sync(&xhci->shared_hcd->rh_timer); } - spin_lock_irq(&xhci->lock); + spin_lock_irqsave(&xhci->lock, flags); xhci_halt(xhci); + + /* Power off USB2 ports*/ + for (i = 0; i < xhci->usb2_rhub.num_ports; i++) + xhci_set_port_power(xhci, xhci->main_hcd, i, false, &flags); + + /* Power off USB3 ports*/ + for (i = 0; i < xhci->usb3_rhub.num_ports; i++) + xhci_set_port_power(xhci, xhci->shared_hcd, i, false, &flags); + /* Workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) xhci_reset(xhci, XHCI_RESET_SHORT_USEC); - spin_unlock_irq(&xhci->lock); + spin_unlock_irqrestore(&xhci->lock, flags); xhci_cleanup_msix(xhci); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 111dc5ff3f0d..136144bcd265 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -2189,6 +2189,8 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, int xhci_hub_status_data(struct usb_hcd *hcd, char *buf); int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1); struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd); +void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index, + bool on, unsigned long *flags); void xhci_hc_died(struct xhci_hcd *xhci); diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 4232f1ce3fbf..1d435e4ee857 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -360,6 +360,7 @@ static int omap2430_probe(struct platform_device *pdev) control_node = of_parse_phandle(np, "ctrl-module", 0); if (control_node) { control_pdev = of_find_device_by_node(control_node); + of_node_put(control_node); if (!control_pdev) { dev_err(&pdev->dev, "Failed to get control device\n"); ret = -EINVAL; diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index b74621dc2a65..8f980fc6efc1 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1023,6 +1023,9 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) }, + /* Belimo Automation devices */ + { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) }, + { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) }, /* ICP DAS I-756xU devices */ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index d1a9564697a4..4e92c165c86b 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1568,6 +1568,12 @@ #define CHETCO_SEASMART_LITE_PID 0xA5AE /* SeaSmart Lite USB Adapter */ #define CHETCO_SEASMART_ANALOG_PID 0xA5AF /* SeaSmart Analog Adapter */ +/* + * Belimo Automation + */ +#define BELIMO_ZTH_PID 0x8050 +#define BELIMO_ZIP_PID 0xC811 + /* * Unjo AB */ diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index c327d4cf7928..03bcab3b9bd0 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -168,6 +168,7 @@ static const struct usb_device_id edgeport_2port_id_table[] = { { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) }, + { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) }, { } }; @@ -206,6 +207,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) }, + { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) }, { } }; diff --git a/drivers/usb/serial/io_usbvend.h b/drivers/usb/serial/io_usbvend.h index 52cbc353051f..9a6f742ad3ab 100644 --- a/drivers/usb/serial/io_usbvend.h +++ b/drivers/usb/serial/io_usbvend.h @@ -212,6 +212,7 @@ // // Definitions for other product IDs #define ION_DEVICE_ID_MT4X56USB 0x1403 // OEM device +#define ION_DEVICE_ID_E5805A 0x1A01 // OEM device (rebranded Edgeport/4) #define GENERATION_ID_FROM_USB_PRODUCT_ID(ProductId) \ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2eb4083c5b45..44e06b95584e 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -252,10 +252,12 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 +#define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 #define QUECTEL_PRODUCT_EC200T 0x6026 +#define QUECTEL_PRODUCT_RM500K 0x7001 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -432,6 +434,8 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_CLS8 0x00b0 #define CINTERION_PRODUCT_MV31_MBIM 0x00b3 #define CINTERION_PRODUCT_MV31_RMNET 0x00b7 +#define CINTERION_PRODUCT_MV31_2_MBIM 0x00b8 +#define CINTERION_PRODUCT_MV31_2_RMNET 0x00b9 #define CINTERION_PRODUCT_MV32_WA 0x00f1 #define CINTERION_PRODUCT_MV32_WB 0x00f2 @@ -1132,17 +1136,22 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0xff, 0x30) }, /* EM160R-GL */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, 0x0700, 0xff), /* BG95 */ + .driver_info = RSVD(3) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), .driver_info = ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, @@ -1275,6 +1284,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x1250, 0xff, 0x00, 0x00) }, /* Telit LE910Cx (rmnet) */ { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261), @@ -1977,6 +1987,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3)}, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff), .driver_info = RSVD(0)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_MBIM, 0xff), + .driver_info = RSVD(3)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_RMNET, 0xff), + .driver_info = RSVD(0)}, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff), .driver_info = RSVD(3)}, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff), diff --git a/drivers/usb/storage/karma.c b/drivers/usb/storage/karma.c index 05cec81dcd3f..38ddfedef629 100644 --- a/drivers/usb/storage/karma.c +++ b/drivers/usb/storage/karma.c @@ -174,24 +174,25 @@ static void rio_karma_destructor(void *extra) static int rio_karma_init(struct us_data *us) { - int ret = 0; struct karma_data *data = kzalloc(sizeof(struct karma_data), GFP_NOIO); if (!data) - goto out; + return -ENOMEM; data->recv = kmalloc(RIO_RECV_LEN, GFP_NOIO); if (!data->recv) { kfree(data); - goto out; + return -ENOMEM; } us->extra = data; us->extra_destructor = rio_karma_destructor; - ret = rio_karma_send_command(RIO_ENTER_STORAGE, us); - data->in_storage = (ret == 0); -out: - return ret; + if (rio_karma_send_command(RIO_ENTER_STORAGE, us)) + return -EIO; + + data->in_storage = 1; + + return 0; } static struct scsi_host_template karma_host_template; diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 8277c2907b8e..4375aa843000 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1776,6 +1776,7 @@ void typec_set_pwr_opmode(struct typec_port *port, partner->usb_pd = 1; sysfs_notify(&partner_dev->kobj, NULL, "supports_usb_power_delivery"); + kobject_uevent(&partner_dev->kobj, KOBJ_CHANGE); } put_device(partner_dev); } diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c index b9035c3407b5..a6e5028105d4 100644 --- a/drivers/usb/typec/mux.c +++ b/drivers/usb/typec/mux.c @@ -127,8 +127,11 @@ typec_switch_register(struct device *parent, sw->dev.class = &typec_mux_class; sw->dev.type = &typec_switch_dev_type; sw->dev.driver_data = desc->drvdata; - dev_set_name(&sw->dev, "%s-switch", - desc->name ? desc->name : dev_name(parent)); + ret = dev_set_name(&sw->dev, "%s-switch", desc->name ? desc->name : dev_name(parent)); + if (ret) { + put_device(&sw->dev); + return ERR_PTR(ret); + } ret = device_add(&sw->dev); if (ret) { @@ -331,8 +334,11 @@ typec_mux_register(struct device *parent, const struct typec_mux_desc *desc) mux->dev.class = &typec_mux_class; mux->dev.type = &typec_mux_dev_type; mux->dev.driver_data = desc->drvdata; - dev_set_name(&mux->dev, "%s-mux", - desc->name ? desc->name : dev_name(parent)); + ret = dev_set_name(&mux->dev, "%s-mux", desc->name ? desc->name : dev_name(parent)); + if (ret) { + put_device(&mux->dev); + return ERR_PTR(ret); + } ret = device_add(&mux->dev); if (ret) { diff --git a/drivers/usb/typec/tcpm/Kconfig b/drivers/usb/typec/tcpm/Kconfig index 557f392fe24d..073fd2ea5e0b 100644 --- a/drivers/usb/typec/tcpm/Kconfig +++ b/drivers/usb/typec/tcpm/Kconfig @@ -56,7 +56,6 @@ config TYPEC_WCOVE tristate "Intel WhiskeyCove PMIC USB Type-C PHY driver" depends on ACPI depends on MFD_INTEL_PMC_BXT - depends on INTEL_SOC_PMIC depends on BXT_WC_PMIC_OPREGION help This driver adds support for USB Type-C on Intel Broxton platforms diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index d8d3892e5a69..3c6d452e3bf4 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -393,7 +393,6 @@ static int stub_probe(struct usb_device *udev) err_port: dev_set_drvdata(&udev->dev, NULL); - usb_put_dev(udev); /* we already have busid_priv, just lock busid_lock */ spin_lock(&busid_priv->busid_lock); @@ -408,6 +407,7 @@ static int stub_probe(struct usb_device *udev) put_busid_priv(busid_priv); sdev_free: + usb_put_dev(udev); stub_device_free(sdev); return rc; diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 325c22008e53..5dd41e8215e0 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -138,7 +138,9 @@ static int tweak_set_configuration_cmd(struct urb *urb) req = (struct usb_ctrlrequest *) urb->setup_packet; config = le16_to_cpu(req->wValue); + usb_lock_device(sdev->udev); err = usb_set_configuration(sdev->udev, config); + usb_unlock_device(sdev->udev); if (err && err != -ENODEV) dev_err(&sdev->udev->dev, "can't set config #%d, error %d\n", config, err); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index f2ad450db547..e65c0fa95d31 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -473,11 +473,14 @@ static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + bool old_ready; spin_lock(&vdpasim->lock); + old_ready = vq->ready; vq->ready = ready; - if (vq->ready) + if (vq->ready && !old_ready) { vdpasim_queue_ready(vdpasim, idx); + } spin_unlock(&vdpasim->lock); } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e303f6f073d2..5beb20768b20 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1450,13 +1450,9 @@ static struct socket *get_raw_socket(int fd) return ERR_PTR(r); } -static struct ptr_ring *get_tap_ptr_ring(int fd) +static struct ptr_ring *get_tap_ptr_ring(struct file *file) { struct ptr_ring *ring; - struct file *file = fget(fd); - - if (!file) - return NULL; ring = tun_get_tx_ring(file); if (!IS_ERR(ring)) goto out; @@ -1465,7 +1461,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd) goto out; ring = NULL; out: - fput(file); return ring; } @@ -1552,8 +1547,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) r = vhost_net_enable_vq(n, vq); if (r) goto err_used; - if (index == VHOST_NET_VQ_RX) - nvq->rx_ring = get_tap_ptr_ring(fd); + if (index == VHOST_NET_VQ_RX) { + if (sock) + nvq->rx_ring = get_tap_ptr_ring(sock->file); + else + nvq->rx_ring = NULL; + } oldubufs = nvq->ubufs; nvq->ubufs = ubufs; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index e4d60009d908..04578aa87e4d 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -97,8 +97,11 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) return; irq = ops->get_vq_irq(vdpa, qid); + if (irq < 0) + return; + irq_bypass_unregister_producer(&vq->call_ctx.producer); - if (!vq->call_ctx.ctx || irq < 0) + if (!vq->call_ctx.ctx) return; vq->call_ctx.producer.token = vq->call_ctx.ctx; diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 0bd7e64331f0..5a0340c85dc6 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -274,7 +274,7 @@ __vringh_iov(struct vringh *vrh, u16 i, int (*copy)(const struct vringh *vrh, void *dst, const void *src, size_t len)) { - int err, count = 0, up_next, desc_max; + int err, count = 0, indirect_count = 0, up_next, desc_max; struct vring_desc desc, *descs; struct vringh_range range = { -1ULL, 0 }, slowrange; bool slow = false; @@ -331,7 +331,12 @@ __vringh_iov(struct vringh *vrh, u16 i, continue; } - if (count++ == vrh->vring.num) { + if (up_next == -1) + count++; + else + indirect_count++; + + if (count > vrh->vring.num || indirect_count > desc_max) { vringh_bad("Descriptor loop in %p", descs); err = -ELOOP; goto fail; @@ -393,6 +398,7 @@ __vringh_iov(struct vringh *vrh, u16 i, i = return_from_indirect(vrh, &up_next, &descs, &desc_max); slow = false; + indirect_count = 0; } else break; } diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index 6a26a364f9bd..5fa12dc7c02c 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "../fbdev/sticore.h" @@ -1127,6 +1128,24 @@ int sti_call(const struct sti_struct *sti, unsigned long func, return ret; } +#if defined(CONFIG_FB_STI) +/* check if given fb_info is the primary device */ +int fb_is_primary_device(struct fb_info *info) +{ + struct sti_struct *sti; + + sti = sti_get_rom(0); + + /* if no built-in graphics card found, allow any fb driver as default */ + if (!sti) + return true; + + /* return true if it's the default built-in framebuffer driver */ + return (sti->info == info); +} +EXPORT_SYMBOL(fb_is_primary_device); +#endif + MODULE_AUTHOR("Philipp Rumpf, Helge Deller, Thomas Bogendoerfer"); MODULE_DESCRIPTION("Core STI driver for HP's NGLE series graphics cards in HP PARISC machines"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c index 33595cc4778e..79efefd224f4 100644 --- a/drivers/video/fbdev/amba-clcd.c +++ b/drivers/video/fbdev/amba-clcd.c @@ -771,12 +771,15 @@ static int clcdfb_of_vram_setup(struct clcd_fb *fb) return -ENODEV; fb->fb.screen_base = of_iomap(memory, 0); - if (!fb->fb.screen_base) + if (!fb->fb.screen_base) { + of_node_put(memory); return -ENOMEM; + } fb->fb.fix.smem_start = of_translate_address(memory, of_get_address(memory, 0, &size, NULL)); fb->fb.fix.smem_len = size; + of_node_put(memory); return 0; } diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index f102519ccefb..76fedfd1b1b0 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2510,6 +2510,11 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font, if (charcount != 256 && charcount != 512) return -EINVAL; + /* font bigger than screen resolution ? */ + if (w > FBCON_SWAP(info->var.rotate, info->var.xres, info->var.yres) || + h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres)) + return -EINVAL; + /* Make sure drawing engine can handle the font */ if (!(info->pixmap.blit_x & (1 << (font->width - 1))) || !(info->pixmap.blit_y & (1 << (font->height - 1)))) @@ -2771,6 +2776,34 @@ void fbcon_update_vcs(struct fb_info *info, bool all) } EXPORT_SYMBOL(fbcon_update_vcs); +/* let fbcon check if it supports a new screen resolution */ +int fbcon_modechange_possible(struct fb_info *info, struct fb_var_screeninfo *var) +{ + struct fbcon_ops *ops = info->fbcon_par; + struct vc_data *vc; + unsigned int i; + + WARN_CONSOLE_UNLOCKED(); + + if (!ops) + return 0; + + /* prevent setting a screen size which is smaller than font size */ + for (i = first_fb_vc; i <= last_fb_vc; i++) { + vc = vc_cons[i].d; + if (!vc || vc->vc_mode != KD_TEXT || + registered_fb[con2fb_map[i]] != info) + continue; + + if (vc->vc_font.width > FBCON_SWAP(var->rotate, var->xres, var->yres) || + vc->vc_font.height > FBCON_SWAP(var->rotate, var->yres, var->xres)) + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(fbcon_modechange_possible); + int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { @@ -3300,6 +3333,9 @@ static void fbcon_register_existing_fbs(struct work_struct *work) console_lock(); + deferred_takeover = false; + logo_shown = FBCON_LOGO_DONTSHOW; + for_each_registered_fb(i) fbcon_fb_registered(registered_fb[i]); @@ -3317,8 +3353,6 @@ static int fbcon_output_notifier(struct notifier_block *nb, pr_info("fbcon: Taking over console\n"); dummycon_unregister_output_notifier(&fbcon_output_nb); - deferred_takeover = false; - logo_shown = FBCON_LOGO_DONTSHOW; /* We may get called in atomic context */ schedule_work(&fbcon_deferred_takeover_work); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 00939ca2065a..d787a344b3b8 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -513,7 +513,7 @@ static int fb_show_logo_line(struct fb_info *info, int rotate, while (n && (n * (logo->width + 8) - 8 > xres)) --n; - image.dx = (xres - n * (logo->width + 8) - 8) / 2; + image.dx = (xres - (n * (logo->width + 8) - 8)) / 2; image.dy = y ?: (yres - logo->height) / 2; } else { image.dx = 0; @@ -1019,6 +1019,16 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) if (ret) return ret; + /* verify that virtual resolution >= physical resolution */ + if (var->xres_virtual < var->xres || + var->yres_virtual < var->yres) { + pr_warn("WARNING: fbcon: Driver '%s' missed to adjust virtual screen size (%ux%u vs. %ux%u)\n", + info->fix.id, + var->xres_virtual, var->yres_virtual, + var->xres, var->yres); + return -EINVAL; + } + if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW) return 0; @@ -1109,7 +1119,9 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, return -EFAULT; console_lock(); lock_fb_info(info); - ret = fb_set_var(info, &var); + ret = fbcon_modechange_possible(info, &var); + if (!ret) + ret = fb_set_var(info, &var); if (!ret) fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL); unlock_fb_info(info); diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 3c309ab20887..40baa79f8046 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1008,7 +1008,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) struct pci_dev *pdev = NULL; void __iomem *fb_virt; int gen2vm = efi_enabled(EFI_BOOT); - resource_size_t pot_start, pot_end; phys_addr_t paddr; int ret; @@ -1059,23 +1058,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) dio_fb_size = screen_width * screen_height * screen_depth / 8; - if (gen2vm) { - pot_start = 0; - pot_end = -1; - } else { - if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || - pci_resource_len(pdev, 0) < screen_fb_size) { - pr_err("Resource not available or (0x%lx < 0x%lx)\n", - (unsigned long) pci_resource_len(pdev, 0), - (unsigned long) screen_fb_size); - goto err1; - } - - pot_end = pci_resource_end(pdev, 0); - pot_start = pot_end - screen_fb_size + 1; - } - - ret = vmbus_allocate_mmio(&par->mem, hdev, pot_start, pot_end, + ret = vmbus_allocate_mmio(&par->mem, hdev, 0, -1, screen_fb_size, 0x100000, true); if (ret != 0) { pr_err("Unable to allocate framebuffer memory\n"); diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c index 4279e13a3b58..9421d14d0eb0 100644 --- a/drivers/video/fbdev/pxa3xx-gcu.c +++ b/drivers/video/fbdev/pxa3xx-gcu.c @@ -650,6 +650,7 @@ static int pxa3xx_gcu_probe(struct platform_device *pdev) for (i = 0; i < 8; i++) { ret = pxa3xx_gcu_add_buffer(dev, priv); if (ret) { + pxa3xx_gcu_free_buffers(dev, priv); dev_err(dev, "failed to allocate DMA memory\n"); goto err_disable_clk; } @@ -666,15 +667,15 @@ static int pxa3xx_gcu_probe(struct platform_device *pdev) SHARED_SIZE, irq); return 0; -err_free_dma: - dma_free_coherent(dev, SHARED_SIZE, - priv->shared, priv->shared_phys); +err_disable_clk: + clk_disable_unprepare(priv->clk); err_misc_deregister: misc_deregister(&priv->misc_dev); -err_disable_clk: - clk_disable_unprepare(priv->clk); +err_free_dma: + dma_free_coherent(dev, SHARED_SIZE, + priv->shared, priv->shared_phys); return ret; } @@ -687,6 +688,7 @@ static int pxa3xx_gcu_remove(struct platform_device *pdev) pxa3xx_gcu_wait_idle(priv); misc_deregister(&priv->misc_dev); dma_free_coherent(dev, SHARED_SIZE, priv->shared, priv->shared_phys); + clk_disable_unprepare(priv->clk); pxa3xx_gcu_free_buffers(dev, priv); return 0; diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 265865610edc..002f265d8db5 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -1317,11 +1317,11 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) goto out_err3; } + /* save for primary gfx device detection & unregister_framebuffer() */ + sti->info = info; if (register_framebuffer(&fb->info) < 0) goto out_err4; - sti->info = info; /* save for unregister_framebuffer() */ - fb_info(&fb->info, "%s %dx%d-%d frame buffer device, %s, id: %04x, mmio: 0x%04lx\n", fix->id, var->xres, diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index de73bd63f6b6..e8ef0c66e558 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -543,6 +544,28 @@ static const struct virtio_config_ops virtio_mmio_config_ops = { .get_shm_region = vm_get_shm_region, }; +#ifdef CONFIG_PM_SLEEP +static int virtio_mmio_freeze(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_freeze(&vm_dev->vdev); +} + +static int virtio_mmio_restore(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + if (vm_dev->version == 1) + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + + return virtio_device_restore(&vm_dev->vdev); +} + +static const struct dev_pm_ops virtio_mmio_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(virtio_mmio_freeze, virtio_mmio_restore) +}; +#endif static void virtio_mmio_release_dev(struct device *_d) { @@ -689,6 +712,7 @@ static int vm_cmdline_set(const char *device, if (!vm_cmdline_parent_registered) { err = device_register(&vm_cmdline_parent); if (err) { + put_device(&vm_cmdline_parent); pr_err("Failed to register parent device!\n"); return err; } @@ -763,26 +787,6 @@ static void vm_unregister_cmdline_devices(void) #endif -#ifdef CONFIG_PM_SLEEP -static int virtio_mmio_freeze(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev); - - return virtio_device_freeze(&vm_dev->vdev); -} - -static int virtio_mmio_restore(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev); - - return virtio_device_restore(&vm_dev->vdev); -} -#endif - -static SIMPLE_DEV_PM_OPS(virtio_mmio_pm_ops, virtio_mmio_freeze, virtio_mmio_restore); - /* Platform driver */ static const struct of_device_id virtio_mmio_match[] = { @@ -806,7 +810,9 @@ static struct platform_driver virtio_mmio_driver = { .name = "virtio-mmio", .of_match_table = virtio_mmio_match, .acpi_match_table = ACPI_PTR(virtio_mmio_acpi_match), - .pm = &virtio_mmio_pm_ops, +#ifdef CONFIG_PM_SLEEP + .pm = &virtio_mmio_pm_ops, +#endif }, }; diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index b35bb2d57f62..1e890ef17687 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -254,8 +254,7 @@ void vp_del_vqs(struct virtio_device *vdev) if (vp_dev->msix_affinity_masks) { for (i = 0; i < vp_dev->msix_vectors; i++) - if (vp_dev->msix_affinity_masks[i]) - free_cpumask_var(vp_dev->msix_affinity_masks[i]); + free_cpumask_var(vp_dev->msix_affinity_masks[i]); } if (vp_dev->msix_enabled) { diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index ae7f9357bb87..46c2a4bd9ebe 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -227,7 +227,7 @@ static int rti_wdt_probe(struct platform_device *pdev) pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); - if (ret) { + if (ret < 0) { pm_runtime_put_noidle(dev); pm_runtime_disable(&pdev->dev); return dev_err_probe(dev, ret, "runtime pm failed\n"); diff --git a/drivers/watchdog/ts4800_wdt.c b/drivers/watchdog/ts4800_wdt.c index c137ad2bd5c3..0ea554c7cda5 100644 --- a/drivers/watchdog/ts4800_wdt.c +++ b/drivers/watchdog/ts4800_wdt.c @@ -125,13 +125,16 @@ static int ts4800_wdt_probe(struct platform_device *pdev) ret = of_property_read_u32_index(np, "syscon", 1, ®); if (ret < 0) { dev_err(dev, "no offset in syscon\n"); + of_node_put(syscon_np); return ret; } /* allocate memory for watchdog struct */ wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); - if (!wdt) + if (!wdt) { + of_node_put(syscon_np); return -ENOMEM; + } /* set regmap and offset to know where to write */ wdt->feed_offset = reg; diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 3065dd670a18..c60723f5ed99 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -462,6 +462,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) return ret; watchdog_set_nowayout(&wdat->wdd, nowayout); + watchdog_stop_on_reboot(&wdat->wdd); return devm_watchdog_register_device(dev, &wdat->wdd); } diff --git a/drivers/xen/features.c b/drivers/xen/features.c index 25c053b09605..2c306de228db 100644 --- a/drivers/xen/features.c +++ b/drivers/xen/features.c @@ -29,6 +29,6 @@ void xen_setup_features(void) if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) break; for (j = 0; j < 32; j++) - xen_features[i * 32 + j] = !!(fi.submap & 1< #include #include +#include struct gntdev_dmabuf_priv; @@ -56,6 +57,7 @@ struct gntdev_grant_map { struct gnttab_unmap_grant_ref *unmap_ops; struct gnttab_map_grant_ref *kmap_ops; struct gnttab_unmap_grant_ref *kunmap_ops; + bool *being_removed; struct page **pages; unsigned long pages_vm_start; @@ -73,6 +75,11 @@ struct gntdev_grant_map { /* Needed to avoid allocation in gnttab_dma_free_pages(). */ xen_pfn_t *frames; #endif + + /* Number of live grants */ + atomic_t live_grants; + /* Needed to avoid allocation in __unmap_grant_pages */ + struct gntab_unmap_queue_data unmap_data; }; struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 54778aadf618..54fee4087bf1 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -60,10 +61,11 @@ module_param(limit, uint, 0644); MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by one mapping request"); +/* True in PV mode, false otherwise */ static int use_ptemod; -static int unmap_grant_pages(struct gntdev_grant_map *map, - int offset, int pages); +static void unmap_grant_pages(struct gntdev_grant_map *map, + int offset, int pages); static struct miscdevice gntdev_miscdev; @@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map) kvfree(map->unmap_ops); kvfree(map->kmap_ops); kvfree(map->kunmap_ops); + kvfree(map->being_removed); kfree(map); } @@ -140,12 +143,15 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, add->kunmap_ops = kvcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL); add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); + add->being_removed = + kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || NULL == add->kmap_ops || NULL == add->kunmap_ops || - NULL == add->pages) + NULL == add->pages || + NULL == add->being_removed) goto err; #ifdef CONFIG_XEN_GRANT_DMA_ALLOC @@ -240,9 +246,36 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) if (!refcount_dec_and_test(&map->users)) return; - if (map->pages && !use_ptemod) + if (map->pages && !use_ptemod) { + /* + * Increment the reference count. This ensures that the + * subsequent call to unmap_grant_pages() will not wind up + * re-entering itself. It *can* wind up calling + * gntdev_put_map() recursively, but such calls will be with a + * reference count greater than 1, so they will return before + * this code is reached. The recursion depth is thus limited to + * 1. Do NOT use refcount_inc() here, as it will detect that + * the reference count is zero and WARN(). + */ + refcount_set(&map->users, 1); + + /* + * Unmap the grants. This may or may not be asynchronous, so it + * is possible that the reference count is 1 on return, but it + * could also be greater than 1. + */ unmap_grant_pages(map, 0, map->count); + /* Check if the memory now needs to be freed */ + if (!refcount_dec_and_test(&map->users)) + return; + + /* + * All pages have been returned to the hypervisor, so free the + * map. + */ + } + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); @@ -288,6 +321,7 @@ static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data) int gntdev_map_grant_pages(struct gntdev_grant_map *map) { + size_t alloced = 0; int i, err = 0; if (!use_ptemod) { @@ -336,87 +370,110 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) map->pages, map->count); for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status == GNTST_okay) + if (map->map_ops[i].status == GNTST_okay) { map->unmap_ops[i].handle = map->map_ops[i].handle; - else if (!err) + if (!use_ptemod) + alloced++; + } else if (!err) err = -EINVAL; if (map->flags & GNTMAP_device_map) map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; if (use_ptemod) { - if (map->kmap_ops[i].status == GNTST_okay) + if (map->kmap_ops[i].status == GNTST_okay) { + if (map->map_ops[i].status == GNTST_okay) + alloced++; map->kunmap_ops[i].handle = map->kmap_ops[i].handle; - else if (!err) + } else if (!err) err = -EINVAL; } } + atomic_add(alloced, &map->live_grants); return err; } -static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void __unmap_grant_pages_done(int result, + struct gntab_unmap_queue_data *data) { - int i, err = 0; - struct gntab_unmap_queue_data unmap_data; + unsigned int i; + struct gntdev_grant_map *map = data->data; + unsigned int offset = data->unmap_ops - map->unmap_ops; - if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { - int pgno = (map->notify.addr >> PAGE_SHIFT); - if (pgno >= offset && pgno < offset + pages) { - /* No need for kmap, pages are in lowmem */ - uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); - tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; - map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; - } - } - - unmap_data.unmap_ops = map->unmap_ops + offset; - unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; - unmap_data.pages = map->pages + offset; - unmap_data.count = pages; - - err = gnttab_unmap_refs_sync(&unmap_data); - if (err) - return err; - - for (i = 0; i < pages; i++) { - if (map->unmap_ops[offset+i].status) - err = -EINVAL; + for (i = 0; i < data->count; i++) { + WARN_ON(map->unmap_ops[offset+i].status && + map->unmap_ops[offset+i].handle != -1); pr_debug("unmap handle=%d st=%d\n", map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = -1; } - return err; + /* + * Decrease the live-grant counter. This must happen after the loop to + * prevent premature reuse of the grants by gnttab_mmap(). + */ + atomic_sub(data->count, &map->live_grants); + + /* Release reference taken by __unmap_grant_pages */ + gntdev_put_map(NULL, map); } -static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) { - int range, err = 0; + if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { + int pgno = (map->notify.addr >> PAGE_SHIFT); + + if (pgno >= offset && pgno < offset + pages) { + /* No need for kmap, pages are in lowmem */ + uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); + + tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; + } + } + + map->unmap_data.unmap_ops = map->unmap_ops + offset; + map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; + map->unmap_data.pages = map->pages + offset; + map->unmap_data.count = pages; + map->unmap_data.done = __unmap_grant_pages_done; + map->unmap_data.data = map; + refcount_inc(&map->users); /* to keep map alive during async call below */ + + gnttab_unmap_refs_async(&map->unmap_data); +} + +static void unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) +{ + int range; + + if (atomic_read(&map->live_grants) == 0) + return; /* Nothing to do */ pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); /* It is possible the requested range will have a "hole" where we * already unmapped some of the grants. Only unmap valid ranges. */ - while (pages && !err) { - while (pages && map->unmap_ops[offset].handle == -1) { + while (pages) { + while (pages && map->being_removed[offset]) { offset++; pages--; } range = 0; while (range < pages) { - if (map->unmap_ops[offset+range].handle == -1) + if (map->being_removed[offset + range]) break; + map->being_removed[offset + range] = true; range++; } - err = __unmap_grant_pages(map, offset, range); + if (range) + __unmap_grant_pages(map, offset, range); offset += range; pages -= range; } - - return err; } /* ------------------------------------------------------------------ */ @@ -468,7 +525,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, struct gntdev_grant_map *map = container_of(mn, struct gntdev_grant_map, notifier); unsigned long mstart, mend; - int err; if (!mmu_notifier_range_blockable(range)) return false; @@ -489,10 +545,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, map->index, map->count, map->vma->vm_start, map->vma->vm_end, range->start, range->end, mstart, mend); - err = unmap_grant_pages(map, + unmap_grant_pages(map, (mstart - map->vma->vm_start) >> PAGE_SHIFT, (mend - mstart) >> PAGE_SHIFT); - WARN_ON(err); return true; } @@ -980,6 +1035,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto unlock_out; if (use_ptemod && map->vma) goto unlock_out; + if (atomic_read(&map->live_grants)) { + err = -EAGAIN; + goto unlock_out; + } refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 34742c6e189e..f17c4c03db30 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -261,7 +261,6 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, return 0; } -EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages); struct remap_pfn { struct mm_struct *mm; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a13ef836fe4e..84f3a6405b55 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -657,14 +657,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, if (stat->st_result_mask & P9_STATS_NLINK) set_nlink(inode, stat->st_nlink); if (stat->st_result_mask & P9_STATS_MODE) { - inode->i_mode = stat->st_mode; - if ((S_ISBLK(inode->i_mode)) || - (S_ISCHR(inode->i_mode))) - init_special_inode(inode, inode->i_mode, - inode->i_rdev); + mode = stat->st_mode & S_IALLUGO; + mode |= inode->i_mode & ~S_IALLUGO; + inode->i_mode = mode; } - if (stat->st_result_mask & P9_STATS_RDEV) - inode->i_rdev = new_decode_dev(stat->st_rdev); if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && stat->st_result_mask & P9_STATS_SIZE) v9fs_i_size_write(inode, stat->st_size); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 262c0ae505af..159795059547 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -412,8 +412,11 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, } /* skip if starts before the current position */ - if (offset < curr) + if (offset < curr) { + if (next > curr) + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); continue; + } /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, diff --git a/fs/afs/inode.c b/fs/afs/inode.c index f81a972bdd29..826fae22a8cc 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -729,10 +729,23 @@ int afs_getattr(const struct path *path, struct kstat *stat, { struct inode *inode = d_inode(path->dentry); struct afs_vnode *vnode = AFS_FS_I(inode); - int seq = 0; + struct key *key; + int ret, seq = 0; _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); + if (vnode->volume && + !(query_flags & AT_STATX_DONT_SYNC) && + !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + ret = afs_validate(vnode, key); + key_put(key); + if (ret < 0) + return ret; + } + do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); generic_fillattr(inode, stat); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 8be709cb8542..efe0fb3ad8bd 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -572,6 +572,8 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENODATA: case -EBADMSG: case -EMSGSIZE: + case -ENOMEM: + case -EFAULT: abort_code = RXGEN_CC_UNMARSHAL; if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; @@ -579,7 +581,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code, ret, "KUM"); goto local_abort; default: - abort_code = RX_USER_ABORT; + abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KER"); goto local_abort; @@ -871,7 +873,7 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RX_USER_ABORT, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); fallthrough; default: _leave(" [error]"); @@ -913,7 +915,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RX_USER_ABORT, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); } _leave(" [error]"); } diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index b9c658e0548e..69f4db05191a 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -427,6 +427,30 @@ static void old_reloc(unsigned long rl) /****************************************************************************/ +static inline u32 __user *skip_got_header(u32 __user *rp) +{ + if (IS_ENABLED(CONFIG_RISCV)) { + /* + * RISC-V has a 16 byte GOT PLT header for elf64-riscv + * and 8 byte GOT PLT header for elf32-riscv. + * Skip the whole GOT PLT header, since it is reserved + * for the dynamic linker (ld.so). + */ + u32 rp_val0, rp_val1; + + if (get_user(rp_val0, rp)) + return rp; + if (get_user(rp_val1, rp + 1)) + return rp; + + if (rp_val0 == 0xffffffff && rp_val1 == 0xffffffff) + rp += 4; + else if (rp_val0 == 0xffffffff) + rp += 2; + } + return rp; +} + static int load_flat_file(struct linux_binprm *bprm, struct lib_info *libinfo, int id, unsigned long *extra_stack) { @@ -774,7 +798,8 @@ static int load_flat_file(struct linux_binprm *bprm, * image. */ if (flags & FLAT_FLAG_GOTPIC) { - for (rp = (u32 __user *)datapos; ; rp++) { + rp = skip_got_header((u32 __user *) datapos); + for (; ; rp++) { u32 addr, rp_val; if (get_user(rp_val, rp)) return -EFAULT; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 87e55b024ac2..35acdab56a1c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3061,7 +3061,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ~BTRFS_FEATURE_INCOMPAT_SUPP; if (features) { btrfs_err(fs_info, - "cannot mount because of unsupported optional features (%llx)", + "cannot mount because of unsupported optional features (0x%llx)", features); err = -EINVAL; goto fail_alloc; @@ -3099,7 +3099,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ~BTRFS_FEATURE_COMPAT_RO_SUPP; if (!sb_rdonly(sb) && features) { btrfs_err(fs_info, - "cannot mount read-write because of unsupported optional features (%llx)", + "cannot mount read-write because of unsupported optional features (0x%llx)", features); err = -EINVAL; goto fail_alloc; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4a5248097d7a..779b7745cdc4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7480,7 +7480,19 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || em->block_start == EXTENT_MAP_INLINE) { free_extent_map(em); - ret = -ENOTBLK; + /* + * If we are in a NOWAIT context, return -EAGAIN in order to + * fallback to buffered IO. This is not only because we can + * block with buffered IO (no support for NOWAIT semantics at + * the moment) but also to avoid returning short reads to user + * space - this happens if we were able to read some data from + * previous non-compressed extents and then when we fallback to + * buffered IO, at btrfs_file_read_iter() by calling + * filemap_read(), we fail to fault in pages for the read buffer, + * in which case filemap_read() returns a short read (the number + * of bytes previously read is > 0, so it does not return -EFAULT). + */ + ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK; goto unlock_err; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b5d2005d3415..310f18d5624c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -652,6 +652,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, compress_force = false; no_compress++; } else { + btrfs_err(info, "unrecognized compression value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -710,8 +712,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_thread_pool: ret = match_int(&args[0], &intarg); if (ret) { + btrfs_err(info, "unrecognized thread_pool value %s", + args[0].from); goto out; } else if (intarg == 0) { + btrfs_err(info, "invalid value 0 for thread_pool"); ret = -EINVAL; goto out; } @@ -772,8 +777,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_ratio: ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, "unrecognized metadata_ratio value %s", + args[0].from); goto out; + } info->metadata_ratio = intarg; btrfs_info(info, "metadata ratio %u", info->metadata_ratio); @@ -790,6 +798,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_and_info(info, DISCARD_ASYNC, "turning on async discard"); } else { + btrfs_err(info, "unrecognized discard mode value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -814,6 +824,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_and_info(info, FREE_SPACE_TREE, "enabling free space tree"); } else { + btrfs_err(info, "unrecognized space_cache value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -889,8 +901,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_check_integrity_print_mask: ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, + "unrecognized check_integrity_print_mask value %s", + args[0].from); goto out; + } info->check_integrity_print_mask = intarg; btrfs_info(info, "check_integrity_print_mask 0x%x", info->check_integrity_print_mask); @@ -905,13 +921,15 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, goto out; #endif case Opt_fatal_errors: - if (strcmp(args[0].from, "panic") == 0) + if (strcmp(args[0].from, "panic") == 0) { btrfs_set_opt(info->mount_opt, PANIC_ON_FATAL_ERROR); - else if (strcmp(args[0].from, "bug") == 0) + } else if (strcmp(args[0].from, "bug") == 0) { btrfs_clear_opt(info->mount_opt, PANIC_ON_FATAL_ERROR); - else { + } else { + btrfs_err(info, "unrecognized fatal_errors value %s", + args[0].from); ret = -EINVAL; goto out; } @@ -919,8 +937,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_commit_interval: intarg = 0; ret = match_int(&args[0], &intarg); - if (ret) + if (ret) { + btrfs_err(info, "unrecognized commit_interval value %s", + args[0].from); + ret = -EINVAL; goto out; + } if (intarg == 0) { btrfs_info(info, "using default commit interval %us", @@ -934,8 +956,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, break; case Opt_rescue: ret = parse_rescue_options(info, args[0].from); - if (ret < 0) + if (ret < 0) { + btrfs_err(info, "unrecognized rescue value %s", + args[0].from); goto out; + } break; #ifdef CONFIG_BTRFS_DEBUG case Opt_fragment_all: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b96eeb724a74..de7879ca512a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7190,12 +7190,12 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) * do another round of validation checks. */ if (total_dev != fs_info->fs_devices->total_devices) { - btrfs_err(fs_info, - "super_num_devices %llu mismatch with num_devices %llu found here", + btrfs_warn(fs_info, +"super block num_devices %llu mismatch with DEV_ITEM count %llu, will be repaired on next transaction commit", btrfs_super_num_devices(fs_info->super_copy), total_dev); - ret = -EINVAL; - goto error; + fs_info->fs_devices->total_devices = total_dev; + btrfs_set_super_num_devices(fs_info->super_copy, total_dev); } if (btrfs_super_total_bytes(fs_info->super_copy) < fs_info->fs_devices->total_rw_bytes) { diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2a3981337077..e4ba1ef15490 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -317,6 +317,14 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val, } #define XATTR_RSTAT_FIELD(_type, _name) \ XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT) +#define XATTR_RSTAT_FIELD_UPDATABLE(_type, _name) \ + { \ + .name = CEPH_XATTR_NAME(_type, _name), \ + .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ + .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ + .exists_cb = NULL, \ + .flags = VXATTR_FLAG_RSTAT, \ + } #define XATTR_LAYOUT_FIELD(_type, _name, _field) \ { \ .name = CEPH_XATTR_NAME2(_type, _name, _field), \ @@ -354,7 +362,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_RSTAT_FIELD(dir, rfiles), XATTR_RSTAT_FIELD(dir, rsubdirs), XATTR_RSTAT_FIELD(dir, rbytes), - XATTR_RSTAT_FIELD(dir, rctime), + XATTR_RSTAT_FIELD_UPDATABLE(dir, rctime), { .name = "ceph.dir.pin", .name_size = sizeof("ceph.dir.pin"), diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3e32160896eb..5a02c7adef09 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1033,7 +1033,7 @@ struct file_system_type cifs_fs_type = { }; MODULE_ALIAS_FS("cifs"); -static struct file_system_type smb3_fs_type = { +struct file_system_type smb3_fs_type = { .owner = THIS_MODULE, .name = "smb3", .mount = smb3_do_mount, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 905d03863721..e996f0bef414 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -51,7 +51,7 @@ static inline unsigned long cifs_get_time(struct dentry *dentry) return (unsigned long) dentry->d_fsdata; } -extern struct file_system_type cifs_fs_type; +extern struct file_system_type cifs_fs_type, smb3_fs_type; extern const struct address_space_operations cifs_addr_ops; extern const struct address_space_operations cifs_addr_ops_smallbuf; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6599069be690..196285b0fe46 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1982,11 +1982,13 @@ extern mempool_t *cifs_mid_poolp; /* Operations for different SMB versions */ #define SMB1_VERSION_STRING "1.0" +#define SMB20_VERSION_STRING "2.0" +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; -#define SMB20_VERSION_STRING "2.0" extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ #define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1c14cf01dbef..9d740916a8ee 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1053,18 +1053,23 @@ static struct super_block *__cifs_get_super(void (*f)(struct super_block *, void .data = data, .sb = NULL, }; + struct file_system_type **fs_type = (struct file_system_type *[]) { + &cifs_fs_type, &smb3_fs_type, NULL, + }; - iterate_supers_type(&cifs_fs_type, f, &sd); - - if (!sd.sb) - return ERR_PTR(-EINVAL); - /* - * Grab an active reference in order to prevent automounts (DFS links) - * of expiring and then freeing up our cifs superblock pointer while - * we're doing failover. - */ - cifs_sb_active(sd.sb); - return sd.sb; + for (; *fs_type; fs_type++) { + iterate_supers_type(*fs_type, f, &sd); + if (sd.sb) { + /* + * Grab an active reference in order to prevent automounts (DFS links) + * of expiring and then freeing up our cifs superblock pointer while + * we're doing failover. + */ + cifs_sb_active(sd.sb); + return sd.sb; + } + } + return ERR_PTR(-EINVAL); } static void __cifs_put_super(struct super_block *sb) diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index a718dc77e604..97cd4df04060 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -371,8 +371,6 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, num_rqst++; if (cfile) { - cifsFileInfo_put(cfile); - cfile = NULL; rc = compound_send_recv(xid, ses, server, flags, num_rqst - 2, &rqst[1], &resp_buftype[1], diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c758ff41b638..b855abfaaf87 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3632,7 +3632,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, if (rc) goto out; - if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) + if (cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) smb2_set_sparse(xid, tcon, cfile, inode, false); eof = cpu_to_le64(off + len); @@ -4032,11 +4032,13 @@ smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, } } +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY static bool smb2_is_read_op(__u32 oplock) { return oplock == SMB2_OPLOCK_LEVEL_II; } +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ static bool smb21_is_read_op(__u32 oplock) @@ -5122,7 +5124,7 @@ smb2_make_node(unsigned int xid, struct inode *inode, return rc; } - +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -5220,6 +5222,7 @@ struct smb_version_operations smb20_operations = { .llseek = smb3_llseek, .is_status_io_timeout = smb2_is_status_io_timeout, }; +#endif /* CIFS_ALLOW_INSECURE_LEGACY */ struct smb_version_operations smb21_operations = { .compare_fids = smb2_compare_fids, @@ -5548,6 +5551,7 @@ struct smb_version_operations smb311_operations = { .is_status_io_timeout = smb2_is_status_io_timeout, }; +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY struct smb_version_values smb20_values = { .version_string = SMB20_VERSION_STRING, .protocol_id = SMB20_PROT_ID, @@ -5568,6 +5572,7 @@ struct smb_version_values smb20_values = { .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, .create_lease_size = sizeof(struct create_lease), }; +#endif /* ALLOW_INSECURE_LEGACY */ struct smb_version_values smb21_values = { .version_string = SMB21_VERSION_STRING, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 88554b640b0d..24dd711fa9b9 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -281,6 +281,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, ses->binding_chan = NULL; mutex_unlock(&tcon->ses->session_mutex); goto failed; + } else if (rc) { + mutex_unlock(&ses->session_mutex); + goto out; } } /* diff --git a/fs/dax.c b/fs/dax.c index d5d7b9393bca..3e7e9a57fd28 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -846,7 +846,8 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index, if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) goto unlock_pmd; - flush_cache_page(vma, address, pfn); + flush_cache_range(vma, address, + address + HPAGE_PMD_SIZE); pmd = pmdp_invalidate(vma, address, pmdp); pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 1e9d8999b939..eaa28d654e9f 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1551,6 +1551,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, lkb->lkb_wait_type = 0; lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; lkb->lkb_wait_count--; + unhold_lkb(lkb); goto out_del; } @@ -1577,6 +1578,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, log_error(ls, "remwait error %x reply %d wait_type %d overlap", lkb->lkb_id, mstype, lkb->lkb_wait_type); lkb->lkb_wait_count--; + unhold_lkb(lkb); lkb->lkb_wait_type = 0; } @@ -4065,13 +4067,14 @@ static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len) rv = _create_message(ls, sizeof(struct dlm_message) + len, dir_nodeid, DLM_MSG_REMOVE, &ms, &mh); if (rv) - return; + goto out; memcpy(ms->m_extra, name, len); ms->m_hash = hash; send_message(mh, ms); +out: spin_lock(&ls->ls_remove_spin); ls->ls_remove_len = 0; memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN); @@ -5312,11 +5315,16 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; lkb->lkb_wait_type = 0; - lkb->lkb_wait_count = 0; + /* drop all wait_count references we still + * hold a reference for this iteration. + */ + while (lkb->lkb_wait_count) { + lkb->lkb_wait_count--; + unhold_lkb(lkb); + } mutex_lock(&ls->ls_waiters_mutex); list_del_init(&lkb->lkb_wait_reply); mutex_unlock(&ls->ls_waiters_mutex); - unhold_lkb(lkb); /* for waiters list */ if (oc || ou) { /* do an unlock or cancel instead of resending */ diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index c38b2b8ffd1d..a10d2bcfe75a 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -23,11 +23,11 @@ struct plock_op { struct list_head list; int done; struct dlm_plock_info info; + int (*callback)(struct file_lock *fl, int result); }; struct plock_xop { struct plock_op xop; - int (*callback)(struct file_lock *fl, int result); void *fl; void *file; struct file_lock flc; @@ -129,19 +129,18 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, /* fl_owner is lockd which doesn't distinguish processes on the nfs client */ op->info.owner = (__u64) fl->fl_pid; - xop->callback = fl->fl_lmops->lm_grant; + op->callback = fl->fl_lmops->lm_grant; locks_init_lock(&xop->flc); locks_copy_lock(&xop->flc, fl); xop->fl = fl; xop->file = file; } else { op->info.owner = (__u64)(long) fl->fl_owner; - xop->callback = NULL; } send_op(op); - if (xop->callback == NULL) { + if (!op->callback) { rv = wait_event_interruptible(recv_wq, (op->done != 0)); if (rv == -ERESTARTSYS) { log_debug(ls, "dlm_posix_lock: wait killed %llx", @@ -203,7 +202,7 @@ static int dlm_plock_callback(struct plock_op *op) file = xop->file; flc = &xop->flc; fl = xop->fl; - notify = xop->callback; + notify = op->callback; if (op->info.rv) { notify(fl, op->info.rv); @@ -436,10 +435,9 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, if (op->info.fsid == info.fsid && op->info.number == info.number && op->info.owner == info.owner) { - struct plock_xop *xop = (struct plock_xop *)op; list_del_init(&op->list); memcpy(&op->info, &info, sizeof(info)); - if (xop->callback) + if (op->callback) do_callback = 1; else op->done = 1; diff --git a/fs/exec.c b/fs/exec.c index b750bd9a0d56..07603501b105 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1288,7 +1288,7 @@ int begin_new_exec(struct linux_binprm * bprm) bprm->mm = NULL; #ifdef CONFIG_POSIX_TIMERS - exit_itimers(me->signal); + exit_itimers(me); flush_itimer_signals(); #endif diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 6232c0958384..da2c94c652b0 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -148,7 +148,9 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync) struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); - WARN_ON(clu < EXFAT_FIRST_CLUSTER); + if (!is_valid_cluster(sbi, clu)) + return -EINVAL; + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); @@ -166,7 +168,9 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync) struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_mount_options *opts = &sbi->options; - WARN_ON(clu < EXFAT_FIRST_CLUSTER); + if (!is_valid_cluster(sbi, clu)) + return; + ent_idx = CLUSTER_TO_BITMAP_ENT(clu); i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index d5cdc9175326..2aa02a7c643a 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -380,6 +380,14 @@ static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi, EXFAT_RESERVED_CLUSTERS; } +static inline bool is_valid_cluster(struct exfat_sb_info *sbi, + unsigned int clus) +{ + if (clus < EXFAT_FIRST_CLUSTER || sbi->num_clusters <= clus) + return false; + return true; +} + /* super.c */ int exfat_set_volume_dirty(struct super_block *sb); int exfat_clear_volume_dirty(struct super_block *sb); diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index dcc103b27dbe..78aa02e2d2c1 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -81,14 +81,6 @@ int exfat_ent_set(struct super_block *sb, unsigned int loc, return 0; } -static inline bool is_valid_cluster(struct exfat_sb_info *sbi, - unsigned int clus) -{ - if (clus < EXFAT_FIRST_CLUSTER || sbi->num_clusters <= clus) - return false; - return true; -} - int exfat_ent_get(struct super_block *sb, unsigned int loc, unsigned int *content) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8372a124d1fc..666ea80b3cf3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1419,12 +1419,6 @@ struct ext4_super_block { #ifdef __KERNEL__ -#ifdef CONFIG_FS_ENCRYPTION -#define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL) -#else -#define DUMMY_ENCRYPTION_ENABLED(sbi) (0) -#endif - /* Number of quota types we support */ #define EXT4_MAXQUOTAS 3 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 80b876ab6b1f..0f49bf547b84 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -371,7 +371,7 @@ static int ext4_valid_extent_entries(struct inode *inode, { unsigned short entries; ext4_lblk_t lblock = 0; - ext4_lblk_t prev = 0; + ext4_lblk_t cur = 0; if (eh->eh_entries == 0) return 1; @@ -395,11 +395,11 @@ static int ext4_valid_extent_entries(struct inode *inode, /* Check for overlapping extents */ lblock = le32_to_cpu(ext->ee_block); - if ((lblock <= prev) && prev) { + if (lblock < cur) { *pblk = ext4_ext_pblock(ext); return 0; } - prev = lblock + ext4_ext_get_actual_len(ext) - 1; + cur = lblock + ext4_ext_get_actual_len(ext); ext++; entries--; } @@ -419,13 +419,13 @@ static int ext4_valid_extent_entries(struct inode *inode, /* Check for overlapping index extents */ lblock = le32_to_cpu(ext_idx->ei_block); - if ((lblock <= prev) && prev) { + if (lblock < cur) { *pblk = ext4_idx_pblock(ext_idx); return 0; } ext_idx++; entries--; - prev = lblock; + cur = lblock + 1; } } return 1; @@ -4691,16 +4691,17 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) return -EOPNOTSUPP; ext4_fc_start_update(inode); + inode_lock(inode); + ret = ext4_convert_inline_data(inode); + inode_unlock(inode); + if (ret) + goto exit; if (mode & FALLOC_FL_PUNCH_HOLE) { ret = ext4_punch_hole(file, offset, len); goto exit; } - ret = ext4_convert_inline_data(inode); - if (ret) - goto exit; - if (mode & FALLOC_FL_COLLAPSE_RANGE) { ret = ext4_collapse_range(file, offset, len); goto exit; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 58ce2b0c90bd..80bdc5868599 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1993,6 +1993,18 @@ int ext4_convert_inline_data(struct inode *inode) if (!ext4_has_inline_data(inode)) { ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); return 0; + } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { + /* + * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is + * cleared. This means we are in the middle of moving of + * inline data to delay allocated block. Just force writeout + * here to finish conversion. + */ + error = filemap_flush(inode->i_mapping); + if (error) + return error; + if (!ext4_has_inline_data(inode)) + return 0; } needed_blocks = ext4_writepage_trans_blocks(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f4a3ce46f488..58b9180a5b01 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4074,15 +4074,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) trace_ext4_punch_hole(inode, offset, length, 0); - ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); - if (ext4_has_inline_data(inode)) { - down_write(&EXT4_I(inode)->i_mmap_sem); - ret = ext4_convert_inline_data(inode); - up_write(&EXT4_I(inode)->i_mmap_sem); - if (ret) - return ret; - } - /* * Write out all dirty pages to avoid race conditions * Then release them. @@ -5476,6 +5467,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { handle_t *handle; loff_t oldsize = inode->i_size; + loff_t old_disksize; int shrink = (attr->ia_size < inode->i_size); if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { @@ -5549,6 +5541,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) inode->i_sb->s_blocksize_bits); down_write(&EXT4_I(inode)->i_data_sem); + old_disksize = EXT4_I(inode)->i_disksize; EXT4_I(inode)->i_disksize = attr->ia_size; rc = ext4_mark_inode_dirty(handle, inode); if (!error) @@ -5560,6 +5553,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) */ if (!error) i_size_write(inode, attr->ia_size); + else + EXT4_I(inode)->i_disksize = old_disksize; up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); if (error) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 15223b5a3af9..c32d0895c3a3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3520,6 +3520,15 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, size = size >> bsbits; start = start_off >> bsbits; + /* + * For tiny groups (smaller than 8MB) the chosen allocation + * alignment may be larger than group size. Make sure the + * alignment does not move allocation to a different group which + * makes mballoc fail assertions later. + */ + start = max(start, rounddown(ac->ac_o_ex.fe_logical, + (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb))); + /* don't cover already allocated blocks in selected range */ if (ar->pleft && start <= ar->lleft) { size -= ar->lleft + 1 - start; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b6c7f0dfc1e8..f1a4c3eaa768 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -757,12 +757,14 @@ static struct dx_frame * dx_probe(struct ext4_filename *fname, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame_in) { - unsigned count, indirect; + unsigned count, indirect, level, i; struct dx_entry *at, *entries, *p, *q, *m; struct dx_root *root; struct dx_frame *frame = frame_in; struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); u32 hash; + ext4_lblk_t block; + ext4_lblk_t blocks[EXT4_HTREE_LEVEL]; memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0])); frame->bh = ext4_read_dirblock(dir, 0, INDEX); @@ -834,6 +836,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, } dxtrace(printk("Look up %x", hash)); + level = 0; + blocks[0] = 0; while (1) { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { @@ -875,15 +879,27 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, dx_get_block(at))); frame->entries = entries; frame->at = at; - if (!indirect--) + + block = dx_get_block(at); + for (i = 0; i <= level; i++) { + if (blocks[i] == block) { + ext4_warning_inode(dir, + "dx entry: tree cycle block %u points back to block %u", + blocks[level], block); + goto fail; + } + } + if (++level > indirect) return frame; + blocks[level] = block; frame++; - frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); + frame->bh = ext4_read_dirblock(dir, block, INDEX); if (IS_ERR(frame->bh)) { ret_err = (struct dx_frame *) frame->bh; frame->bh = NULL; goto fail; } + entries = ((struct dx_node *) frame->bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit(dir)) { @@ -1901,7 +1917,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct dx_hash_info *hinfo, ext4_lblk_t *newblock) { unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; + unsigned continued; + int count; struct buffer_head *bh2; u32 hash2; struct dx_map_entry *map; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6513079c728b..015028302305 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -52,6 +52,16 @@ int ext4_resize_begin(struct super_block *sb) if (!capable(CAP_SYS_RESOURCE)) return -EPERM; + /* + * If the reserved GDT blocks is non-zero, the resize_inode feature + * should always be set. + */ + if (EXT4_SB(sb)->s_es->s_reserved_gdt_blocks && + !ext4_has_feature_resize_inode(sb)) { + ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero"); + return -EFSCORRUPTED; + } + /* * If we are not using the primary superblock/GDT copy don't resize, * because the user tools have no way of handling this. Probably a diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 662c84406aaf..012d2eb6f2d3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1960,6 +1960,7 @@ static const struct mount_opts { MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET}, {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR}, + {Opt_commit, 0, MOPT_NO_EXT2}, {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, @@ -2083,6 +2084,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); int err; + if (!ext4_has_feature_encrypt(sb)) { + ext4_msg(sb, KERN_WARNING, + "test_dummy_encryption requires encrypt feature"); + return -1; + } + /* * This mount option is just for testing, and it's not worthwhile to * implement the extra complexity (e.g. RCU protection) that would be @@ -2110,11 +2117,13 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, return -1; } ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); + return 1; #else ext4_msg(sb, KERN_WARNING, - "Test dummy encryption mount option ignored"); + "test_dummy_encryption option not supported"); + return -1; + #endif - return 1; } static int handle_mount_opt(struct super_block *sb, char *opt, int token, @@ -4531,7 +4540,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_inodes_per_block; sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); sbi->s_sbh = bh; - sbi->s_mount_state = le16_to_cpu(es->s_state); + sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY; sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); @@ -4922,12 +4931,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount_wq; } - if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && - !ext4_has_feature_encrypt(sb)) { - ext4_set_feature_encrypt(sb); - ext4_commit_super(sb, 1); - } - /* * Get the # of file system overhead blocks from the * superblock if present. @@ -5986,7 +5989,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (err) goto restore_opts; } - sbi->s_mount_state = le16_to_cpu(es->s_state); + sbi->s_mount_state = (le16_to_cpu(es->s_state) & + ~EXT4_FC_REPLAY); err = ext4_setup_super(sb, es, 0); if (err) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2d4323218cb0..6054f13e5c89 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -150,7 +150,7 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", blkaddr, exist); set_sbi_flag(sbi, SBI_NEED_FSCK); - WARN_ON(1); + dump_stack(); } return exist; } @@ -188,7 +188,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, f2fs_warn(sbi, "access invalid blkaddr:%u", blkaddr); set_sbi_flag(sbi, SBI_NEED_FSCK); - WARN_ON(1); + dump_stack(); return false; } else { return __is_bitmap_valid(sbi, blkaddr, type); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 42134a532de9..56b67a0d18d5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1071,8 +1071,8 @@ enum count_type { */ #define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type)) enum page_type { - DATA, - NODE, + DATA = 0, + NODE = 1, /* should not change this */ META, NR_PAGE_TYPE, META_FLUSH, @@ -2560,11 +2560,17 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, { spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi, !sbi->total_valid_block_count); - f2fs_bug_on(sbi, !sbi->total_valid_node_count); + if (unlikely(!sbi->total_valid_block_count || + !sbi->total_valid_node_count)) { + f2fs_warn(sbi, "dec_valid_node_count: inconsistent block counts, total_valid_block:%u, total_valid_node:%u", + sbi->total_valid_block_count, + sbi->total_valid_node_count); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } else { + sbi->total_valid_block_count--; + sbi->total_valid_node_count--; + } - sbi->total_valid_node_count--; - sbi->total_valid_block_count--; if (sbi->reserved_blocks && sbi->current_reserved_blocks < sbi->reserved_blocks) sbi->current_reserved_blocks++; @@ -4003,6 +4009,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab; * inline.c */ bool f2fs_may_inline_data(struct inode *inode); +bool f2fs_sanity_check_inline_data(struct inode *inode); bool f2fs_may_inline_dentry(struct inode *inode); void f2fs_do_read_inline_data(struct page *page, struct page *ipage); void f2fs_truncate_inline_inode(struct inode *inode, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0d4820afca16..478b65efbf31 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1415,11 +1415,19 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, ret = -ENOSPC; break; } - if (dn->data_blkaddr != NEW_ADDR) { - f2fs_invalidate_blocks(sbi, dn->data_blkaddr); - dn->data_blkaddr = NEW_ADDR; - f2fs_set_data_blkaddr(dn); + + if (dn->data_blkaddr == NEW_ADDR) + continue; + + if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, + DATA_GENERIC_ENHANCE)) { + ret = -EFSCORRUPTED; + break; } + + f2fs_invalidate_blocks(sbi, dn->data_blkaddr); + dn->data_blkaddr = NEW_ADDR; + f2fs_set_data_blkaddr(dn); } f2fs_update_extent_cache_range(dn, start, 0, index - start); @@ -1738,6 +1746,10 @@ static long f2fs_fallocate(struct file *file, int mode, inode_lock(inode); + ret = file_modified(file); + if (ret) + goto out; + if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset >= inode->i_size) goto out; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index bb50338a38c9..470b069fa830 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -15,21 +15,40 @@ #include #include -bool f2fs_may_inline_data(struct inode *inode) +static bool support_inline_data(struct inode *inode) { if (f2fs_is_atomic_file(inode)) return false; - if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; - if (i_size_read(inode) > MAX_INLINE_DATA(inode)) return false; + return true; +} - if (f2fs_post_read_required(inode)) +bool f2fs_may_inline_data(struct inode *inode) +{ + if (!support_inline_data(inode)) return false; - return true; + return !f2fs_post_read_required(inode); +} + +bool f2fs_sanity_check_inline_data(struct inode *inode) +{ + if (!f2fs_has_inline_data(inode)) + return false; + + if (!support_inline_data(inode)) + return true; + + /* + * used by sanity_check_inode(), when disk layout fields has not + * been synchronized to inmem fields. + */ + return (S_ISREG(inode->i_mode) && + (file_is_encrypt(inode) || file_is_verity(inode) || + (F2FS_I(inode)->i_flags & F2FS_COMPR_FL))); } bool f2fs_may_inline_dentry(struct inode *inode) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 30278af1cdd7..d25853c98d4a 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -276,8 +276,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } } - if (f2fs_has_inline_data(inode) && - (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) { + if (f2fs_sanity_check_inline_data(inode)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); @@ -781,8 +780,22 @@ void f2fs_evict_inode(struct inode *inode) f2fs_lock_op(sbi); err = f2fs_remove_inode_page(inode); f2fs_unlock_op(sbi); - if (err == -ENOENT) + if (err == -ENOENT) { err = 0; + + /* + * in fuzzed image, another node may has the same + * block address as inode's, if it was truncated + * previously, truncation of inode node will fail. + */ + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + f2fs_warn(F2FS_I_SB(inode), + "f2fs_evict_inode: inconsistent node id, ino:%lu", + inode->i_ino); + f2fs_inode_synced(inode); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } + } } /* give more chances, if ENOMEM case */ diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 3048074618ff..fd4265c73b0d 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -91,8 +91,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (test_opt(sbi, INLINE_XATTR)) set_inode_flag(inode, FI_INLINE_XATTR); - if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) - set_inode_flag(inode, FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); @@ -109,10 +107,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) f2fs_init_extent_tree(inode, NULL); - stat_inc_inline_xattr(inode); - stat_inc_inline_inode(inode); - stat_inc_inline_dir(inode); - F2FS_I(inode)->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); @@ -129,6 +123,14 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_compress_context(inode); } + /* Should enable inline_data after compression set */ + if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) + set_inode_flag(inode, FI_INLINE_DATA); + + stat_inc_inline_xattr(inode); + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + f2fs_set_inode_flags(inode); trace_f2fs_new_inode(inode, 0); @@ -317,6 +319,9 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, if (!is_extension_exist(name, ext[i], false)) continue; + /* Do not use inline_data with compression */ + stat_dec_inline_inode(inode); + clear_inode_flag(inode, FI_INLINE_DATA); set_compress_context(inode); return; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8fe7ea4489d3..2d29eb15a550 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -354,16 +354,19 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct list_head *head = &fi->inmem_pages; struct inmem_pages *cur = NULL; + struct inmem_pages *tmp; f2fs_bug_on(sbi, !page_private_atomic(page)); mutex_lock(&fi->inmem_lock); - list_for_each_entry(cur, head, list) { - if (cur->page == page) + list_for_each_entry(tmp, head, list) { + if (tmp->page == page) { + cur = tmp; break; + } } - f2fs_bug_on(sbi, list_empty(head) || cur->page != page); + f2fs_bug_on(sbi, !cur); list_del(&cur->list); mutex_unlock(&fi->inmem_lock); @@ -4471,7 +4474,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) unsigned int i, start, end; unsigned int readed, start_blk = 0; int err = 0; - block_t total_node_blocks = 0; + block_t sit_valid_blocks[2] = {0, 0}; do { readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, @@ -4496,8 +4499,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (err) return err; seg_info_from_raw_sit(se, &sit); - if (IS_NODESEG(se->type)) - total_node_blocks += se->valid_blocks; + + sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; /* build discard map only one time */ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { @@ -4535,15 +4538,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) sit = sit_in_journal(journal, i); old_valid_blocks = se->valid_blocks; - if (IS_NODESEG(se->type)) - total_node_blocks -= old_valid_blocks; + + sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks; err = check_block_count(sbi, start, &sit); if (err) break; seg_info_from_raw_sit(se, &sit); - if (IS_NODESEG(se->type)) - total_node_blocks += se->valid_blocks; + + sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); @@ -4563,13 +4566,24 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) } up_read(&curseg->journal_rwsem); - if (!err && total_node_blocks != valid_node_count(sbi)) { + if (err) + return err; + + if (sit_valid_blocks[NODE] != valid_node_count(sbi)) { f2fs_err(sbi, "SIT is corrupted node# %u vs %u", - total_node_blocks, valid_node_count(sbi)); - err = -EFSCORRUPTED; + sit_valid_blocks[NODE], valid_node_count(sbi)); + return -EFSCORRUPTED; } - return err; + if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] > + valid_user_blocks(sbi)) { + f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u", + sit_valid_blocks[DATA], sit_valid_blocks[NODE], + valid_user_blocks(sbi)); + return -EFSCORRUPTED; + } + + return 0; } static void init_free_segmap(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 10ddc2803ed0..d31eb2f2019c 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -24,6 +24,7 @@ #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA) #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE) +#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA)) static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, unsigned short seg_type) @@ -571,11 +572,10 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi)); } -static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi) +static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, + unsigned int node_blocks, unsigned int dent_blocks) { - unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) + - get_pages(sbi, F2FS_DIRTY_DENTS); - unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int segno, left_blocks; int i; @@ -601,19 +601,28 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi) static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed, int needed) { - int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); - int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) + + get_pages(sbi, F2FS_DIRTY_DENTS) + + get_pages(sbi, F2FS_DIRTY_IMETA); + unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int node_secs = total_node_blocks / BLKS_PER_SEC(sbi); + unsigned int dent_secs = total_dent_blocks / BLKS_PER_SEC(sbi); + unsigned int node_blocks = total_node_blocks % BLKS_PER_SEC(sbi); + unsigned int dent_blocks = total_dent_blocks % BLKS_PER_SEC(sbi); + unsigned int free, need_lower, need_upper; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; - if (free_sections(sbi) + freed == reserved_sections(sbi) + needed && - has_curseg_enough_space(sbi)) + free = free_sections(sbi) + freed; + need_lower = node_secs + dent_secs + reserved_sections(sbi) + needed; + need_upper = need_lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0); + + if (free > need_upper) return false; - return (free_sections(sbi) + freed) <= - (node_secs + 2 * dent_secs + imeta_secs + - reserved_sections(sbi) + needed); + else if (free <= need_lower) + return true; + return !has_curseg_enough_space(sbi, node_blocks, dent_blocks); } static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 98f585000aa2..cb049996e53b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2548,7 +2548,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) if (!sb_has_quota_active(sb, cnt)) continue; - inode_lock(dqopt->files[cnt]); + if (!f2fs_sb_has_quota_ino(sbi)) + inode_lock(dqopt->files[cnt]); /* * do_quotactl @@ -2567,7 +2568,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) f2fs_up_read(&sbi->quota_sem); f2fs_unlock_op(sbi); - inode_unlock(dqopt->files[cnt]); + if (!f2fs_sb_has_quota_ino(sbi)) + inode_unlock(dqopt->files[cnt]); if (ret) break; diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index f7e3304b7802..353735032947 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -93,7 +93,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, err_brelse: brelse(bhs[0]); err: - fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); + fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)", + (llu)blocknr); return -EIO; } @@ -106,8 +107,8 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, fatent->fat_inode = MSDOS_SB(sb)->fat_inode; fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { - fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", - (llu)blocknr); + fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)", + (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d51354221fe7..87bf2ffc7c7f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1650,11 +1650,12 @@ static long writeback_sb_inodes(struct super_block *sb, }; unsigned long start_time = jiffies; long write_chunk; - long wrote = 0; /* count both pages and inodes */ + long total_wrote = 0; /* count both pages and inodes */ while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); struct bdi_writeback *tmp_wb; + long wrote; if (inode->i_sb != sb) { if (work->sb) { @@ -1730,7 +1731,9 @@ static long writeback_sb_inodes(struct super_block *sb, wbc_detach_inode(&wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; - wrote += write_chunk - wbc.nr_to_write; + wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped; + wrote = wrote < 0 ? 0 : wrote; + total_wrote += wrote; if (need_resched()) { /* @@ -1752,7 +1755,7 @@ static long writeback_sb_inodes(struct super_block *sb, tmp_wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) - wrote++; + total_wrote++; requeue_inode(inode, tmp_wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); @@ -1766,14 +1769,14 @@ static long writeback_sb_inodes(struct super_block *sb, * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. */ - if (wrote) { + if (total_wrote) { if (time_is_before_jiffies(start_time + HZ / 10UL)) break; if (work->nr_pages <= 0) break; } } - return wrote; + return total_wrote; } static long __writeback_inodes_wb(struct bdi_writeback *wb, diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 2e6f622ed428..55a8eb3c1963 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -858,14 +858,16 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; iocb->ki_flags &= ~IOCB_DIRECT; } + pagefault_disable(); iocb->ki_flags |= IOCB_NOIO; ret = generic_file_read_iter(iocb, to); iocb->ki_flags &= ~IOCB_NOIO; + pagefault_enable(); if (ret >= 0) { if (!iov_iter_count(to)) return ret; written = ret; - } else { + } else if (ret != -EFAULT) { if (ret != -EAGAIN) return ret; if (iocb->ki_flags & IOCB_NOWAIT) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 6e173ae378c4..ad953ecb5853 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -531,34 +531,42 @@ static void qdsb_put(struct gfs2_quota_data *qd) */ int gfs2_qa_get(struct gfs2_inode *ip) { - int error = 0; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct inode *inode = &ip->i_inode; if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - down_write(&ip->i_rw_mutex); + spin_lock(&inode->i_lock); if (ip->i_qadata == NULL) { - ip->i_qadata = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS); - if (!ip->i_qadata) { - error = -ENOMEM; - goto out; - } + struct gfs2_qadata *tmp; + + spin_unlock(&inode->i_lock); + tmp = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS); + if (!tmp) + return -ENOMEM; + + spin_lock(&inode->i_lock); + if (ip->i_qadata == NULL) + ip->i_qadata = tmp; + else + kmem_cache_free(gfs2_qadata_cachep, tmp); } ip->i_qadata->qa_ref++; -out: - up_write(&ip->i_rw_mutex); - return error; + spin_unlock(&inode->i_lock); + return 0; } void gfs2_qa_put(struct gfs2_inode *ip) { - down_write(&ip->i_rw_mutex); + struct inode *inode = &ip->i_inode; + + spin_lock(&inode->i_lock); if (ip->i_qadata && --ip->i_qadata->qa_ref == 0) { kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata); ip->i_qadata = NULL; } - up_write(&ip->i_rw_mutex); + spin_unlock(&inode->i_lock); } int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) diff --git a/fs/io_uring.c b/fs/io_uring.c index b08d1089731c..a952288b2ab8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -696,6 +696,8 @@ struct io_kiocb { */ struct list_head inflight_entry; + struct list_head iopoll_entry; + struct percpu_ref *fixed_file_refs; struct callback_head task_work; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ @@ -773,7 +775,8 @@ static const struct io_op_def io_op_defs[] = { .buffer_select = 1, .needs_async_data = 1, .async_size = sizeof(struct io_async_rw), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | + IO_WQ_WORK_FILES, }, [IORING_OP_WRITEV] = { .needs_file = 1, @@ -783,7 +786,7 @@ static const struct io_op_def io_op_defs[] = { .needs_async_data = 1, .async_size = sizeof(struct io_async_rw), .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FSIZE, + IO_WQ_WORK_FSIZE | IO_WQ_WORK_FILES, }, [IORING_OP_FSYNC] = { .needs_file = 1, @@ -794,7 +797,8 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollin = 1, .async_size = sizeof(struct io_async_rw), - .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM, + .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM | + IO_WQ_WORK_FILES, }, [IORING_OP_WRITE_FIXED] = { .needs_file = 1, @@ -803,7 +807,7 @@ static const struct io_op_def io_op_defs[] = { .pollout = 1, .async_size = sizeof(struct io_async_rw), .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE | - IO_WQ_WORK_MM, + IO_WQ_WORK_MM | IO_WQ_WORK_FILES, }, [IORING_OP_POLL_ADD] = { .needs_file = 1, @@ -857,7 +861,7 @@ static const struct io_op_def io_op_defs[] = { .pollout = 1, .needs_async_data = 1, .async_size = sizeof(struct io_async_connect), - .work_flags = IO_WQ_WORK_MM, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FS, }, [IORING_OP_FALLOCATE] = { .needs_file = 1, @@ -885,7 +889,8 @@ static const struct io_op_def io_op_defs[] = { .pollin = 1, .buffer_select = 1, .async_size = sizeof(struct io_async_rw), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | + IO_WQ_WORK_FILES, }, [IORING_OP_WRITE] = { .needs_file = 1, @@ -894,7 +899,7 @@ static const struct io_op_def io_op_defs[] = { .pollout = 1, .async_size = sizeof(struct io_async_rw), .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FSIZE, + IO_WQ_WORK_FSIZE | IO_WQ_WORK_FILES, }, [IORING_OP_FADVISE] = { .needs_file = 1, @@ -907,14 +912,16 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | + IO_WQ_WORK_FS, }, [IORING_OP_RECV] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | + IO_WQ_WORK_FS, }, [IORING_OP_OPENAT2] = { .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_FS | @@ -2345,8 +2352,8 @@ static void io_iopoll_queue(struct list_head *again) struct io_kiocb *req; do { - req = list_first_entry(again, struct io_kiocb, inflight_entry); - list_del(&req->inflight_entry); + req = list_first_entry(again, struct io_kiocb, iopoll_entry); + list_del(&req->iopoll_entry); __io_complete_rw(req, -EAGAIN, 0, NULL); } while (!list_empty(again)); } @@ -2368,14 +2375,14 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, while (!list_empty(done)) { int cflags = 0; - req = list_first_entry(done, struct io_kiocb, inflight_entry); + req = list_first_entry(done, struct io_kiocb, iopoll_entry); if (READ_ONCE(req->result) == -EAGAIN) { req->result = 0; req->iopoll_completed = 0; - list_move_tail(&req->inflight_entry, &again); + list_move_tail(&req->iopoll_entry, &again); continue; } - list_del(&req->inflight_entry); + list_del(&req->iopoll_entry); if (req->flags & REQ_F_BUFFER_SELECTED) cflags = io_put_rw_kbuf(req); @@ -2411,7 +2418,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, spin = !ctx->poll_multi_file && *nr_events < min; ret = 0; - list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { + list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, iopoll_entry) { struct kiocb *kiocb = &req->rw.kiocb; /* @@ -2420,7 +2427,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, * and complete those lists first, if we have entries there. */ if (READ_ONCE(req->iopoll_completed)) { - list_move_tail(&req->inflight_entry, &done); + list_move_tail(&req->iopoll_entry, &done); continue; } if (!list_empty(&done)) @@ -2432,7 +2439,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, /* iopoll may have completed current req */ if (READ_ONCE(req->iopoll_completed)) - list_move_tail(&req->inflight_entry, &done); + list_move_tail(&req->iopoll_entry, &done); if (ret && spin) spin = false; @@ -2579,45 +2586,6 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res, #ifdef CONFIG_BLOCK static bool io_resubmit_prep(struct io_kiocb *req, int error) { - struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; - ssize_t ret = -ECANCELED; - struct iov_iter iter; - int rw; - - if (error) { - ret = error; - goto end_req; - } - - switch (req->opcode) { - case IORING_OP_READV: - case IORING_OP_READ_FIXED: - case IORING_OP_READ: - rw = READ; - break; - case IORING_OP_WRITEV: - case IORING_OP_WRITE_FIXED: - case IORING_OP_WRITE: - rw = WRITE; - break; - default: - printk_once(KERN_WARNING "io_uring: bad opcode in resubmit %d\n", - req->opcode); - goto end_req; - } - - if (!req->async_data) { - ret = io_import_iovec(rw, req, &iovec, &iter, false); - if (ret < 0) - goto end_req; - ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false); - if (!ret) - return true; - kfree(iovec); - } else { - return true; - } -end_req: req_set_fail_links(req); return false; } @@ -2704,7 +2672,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req) struct io_kiocb *list_req; list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, - inflight_entry); + iopoll_entry); if (list_req->file != req->file) ctx->poll_multi_file = true; } @@ -2714,9 +2682,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req) * it to the front so we find it first. */ if (READ_ONCE(req->iopoll_completed)) - list_add(&req->inflight_entry, &ctx->iopoll_list); + list_add(&req->iopoll_entry, &ctx->iopoll_list); else - list_add_tail(&req->inflight_entry, &ctx->iopoll_list); + list_add_tail(&req->iopoll_entry, &ctx->iopoll_list); if ((ctx->flags & IORING_SETUP_SQPOLL) && wq_has_sleeper(&ctx->sq_data->wait)) @@ -3428,6 +3396,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; + struct iov_iter iter_cp; struct io_async_rw *rw = req->async_data; ssize_t io_size, ret, ret2; bool no_async; @@ -3438,6 +3407,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; + iter_cp = *iter; io_size = iov_iter_count(iter); req->result = io_size; ret = 0; @@ -3473,7 +3443,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, if (req->file->f_flags & O_NONBLOCK) goto done; /* some cases will consume bytes even on error returns */ - iov_iter_revert(iter, io_size - iov_iter_count(iter)); + *iter = iter_cp; ret = 0; goto copy_iov; } else if (ret < 0) { @@ -3556,6 +3526,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; + struct iov_iter iter_cp; struct io_async_rw *rw = req->async_data; ssize_t ret, ret2, io_size; @@ -3565,6 +3536,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; + iter_cp = *iter; io_size = iov_iter_count(iter); req->result = io_size; @@ -3626,7 +3598,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, } else { copy_iov: /* some cases will consume bytes even on error returns */ - iov_iter_revert(iter, io_size - iov_iter_count(iter)); + *iter = iter_cp; ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); if (!ret) return -EAGAIN; @@ -4394,6 +4366,8 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; + if (unlikely(sqe->addr2 || sqe->splice_fd_in || sqe->ioprio)) + return -EINVAL; sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); @@ -4629,6 +4603,8 @@ static int io_recvmsg_prep(struct io_kiocb *req, if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; + if (unlikely(sqe->addr2 || sqe->splice_fd_in || sqe->ioprio)) + return -EINVAL; sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); diff --git a/fs/ioctl.c b/fs/ioctl.c index 32d8bd3f958b..ac3b386d99d5 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -170,7 +170,7 @@ int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, if (*len == 0) return -EINVAL; - if (start > maxbytes) + if (start >= maxbytes) return -EFBIG; /* diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index caed9d98c64a..d88dc9da040e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -528,7 +528,8 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) * write started inside the existing inode size. */ if (pos + len > i_size) - truncate_pagecache_range(inode, max(pos, i_size), pos + len); + truncate_pagecache_range(inode, max(pos, i_size), + pos + len - 1); } static int @@ -1458,13 +1459,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) PF_MEMALLOC)) goto redirty; - /* - * Given that we do not allow direct reclaim to call us, we should - * never be called in a recursive filesystem reclaim context. - */ - if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) - goto redirty; - /* * Is this page beyond the end of the file? * diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 7170de78cd26..db210989784d 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -603,6 +603,7 @@ int jffs2_do_fill_super(struct super_block *sb, struct fs_context *fc) jffs2_free_raw_node_refs(c); kvfree(c->blocks); jffs2_clear_xattr_subsystem(c); + jffs2_sum_exit(c); out_inohash: kfree(c->inocache_list); out_wbuf: diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index e58ae29a223d..0ce17ea8fa8a 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -385,7 +385,8 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) } /* write the last buffer. */ - write_metapage(mp); + if (mp) + write_metapage(mp); IREAD_UNLOCK(ipbmap); diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 9aec80b9d7c6..afb39e1bbe3b 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -19,7 +19,15 @@ DEFINE_MUTEX(kernfs_mutex); static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ -static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ +/* + * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to + * call pr_cont() while holding rename_lock. Because sometimes pr_cont() + * will perform wakeups when releasing console_sem. Holding rename_lock + * will introduce deadlock if the scheduler reads the kernfs_name in the + * wakeup path. + */ +static DEFINE_SPINLOCK(kernfs_pr_cont_lock); +static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) @@ -230,12 +238,12 @@ void pr_cont_kernfs_name(struct kernfs_node *kn) { unsigned long flags; - spin_lock_irqsave(&kernfs_rename_lock, flags); + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); + kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); pr_cont("%s", kernfs_pr_cont_buf); - spin_unlock_irqrestore(&kernfs_rename_lock, flags); + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** @@ -249,10 +257,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) unsigned long flags; int sz; - spin_lock_irqsave(&kernfs_rename_lock, flags); + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf, - sizeof(kernfs_pr_cont_buf)); + sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, + sizeof(kernfs_pr_cont_buf)); if (sz < 0) { pr_cont("(error)"); goto out; @@ -266,7 +274,7 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) pr_cont("%s", kernfs_pr_cont_buf); out: - spin_unlock_irqrestore(&kernfs_rename_lock, flags); + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** @@ -864,13 +872,12 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, lockdep_assert_held(&kernfs_mutex); - /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */ - spin_lock_irq(&kernfs_rename_lock); + spin_lock_irq(&kernfs_pr_cont_lock); len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); if (len >= sizeof(kernfs_pr_cont_buf)) { - spin_unlock_irq(&kernfs_rename_lock); + spin_unlock_irq(&kernfs_pr_cont_lock); return NULL; } @@ -882,7 +889,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, parent = kernfs_find_ns(parent, name, ns); } - spin_unlock_irq(&kernfs_rename_lock); + spin_unlock_irq(&kernfs_pr_cont_lock); return parent; } diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index a5209643ac36..bfdd21224073 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -283,6 +283,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, rv = NFS4_OK; break; case -ENOENT: + set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags); /* Embrace your forgetfulness! */ rv = NFS4ERR_NOMATCHING_LAYOUT; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7b47f9b063f1..ad856b7b9a46 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -208,15 +208,16 @@ static int nfs_file_fsync_commit(struct file *file, int datasync) { struct inode *inode = file_inode(file); - int ret; + int ret, ret2; dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); ret = nfs_commit_inode(inode, FLUSH_SYNC); - if (ret < 0) - return ret; - return file_check_and_advance_wb_err(file); + ret2 = file_check_and_advance_wb_err(file); + if (ret2 < 0) + return ret2; + return ret; } int @@ -389,11 +390,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, return status; NFS_I(mapping->host)->write_io += copied; - if (nfs_ctx_key_to_expire(ctx, mapping->host)) { - status = nfs_wb_all(mapping->host); - if (status < 0) - return status; - } + if (nfs_ctx_key_to_expire(ctx, mapping->host)) + nfs_wb_all(mapping->host); return copied; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7009a8dddd45..a7e0970b5bfe 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -832,6 +832,7 @@ static inline bool nfs_error_is_fatal_on_server(int err) case 0: case -ERESTARTSYS: case -EINTR: + case -ENOMEM: return false; } return nfs_error_is_fatal(err); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3387618a4984..3f2fce583170 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3086,6 +3086,10 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, } out: + if (opendata->lgp) { + nfs4_lgopen_release(opendata->lgp); + opendata->lgp = NULL; + } if (!opendata->cancelled) nfs4_sequence_free_slot(&opendata->o_res.seq_res); return ret; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b3b9eff5d572..21436721745b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -469,6 +469,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, pnfs_clear_lseg_state(lseg, lseg_list); pnfs_clear_layoutreturn_info(lo); pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); + set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags); if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) pnfs_clear_layoutreturn_waitbit(lo); @@ -1923,8 +1924,9 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) { - if (atomic_dec_and_test(&lo->plh_outstanding)) - wake_up_var(&lo->plh_outstanding); + if (atomic_dec_and_test(&lo->plh_outstanding) && + test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); } static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) @@ -2006,6 +2008,7 @@ pnfs_update_layout(struct inode *ino, lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { spin_unlock(&ino->i_lock); + lseg = ERR_PTR(-ENOMEM); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_NOMEM); goto out; @@ -2030,11 +2033,11 @@ pnfs_update_layout(struct inode *ino, * If the layout segment list is empty, but there are outstanding * layoutget calls, then they might be subject to a layoutrecall. */ - if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) && + if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && atomic_read(&lo->plh_outstanding) != 0) { spin_unlock(&ino->i_lock); - lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding, - !atomic_read(&lo->plh_outstanding))); + lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN, + TASK_KILLABLE)); if (IS_ERR(lseg)) goto out_put_layout_hdr; pnfs_put_layout_hdr(lo); @@ -2134,6 +2137,7 @@ pnfs_update_layout(struct inode *ino, lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); if (!lgp) { + lseg = ERR_PTR(-ENOMEM); trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, PNFS_UPDATE_LAYOUT_NOMEM); nfs_layoutget_end(lo); @@ -2153,6 +2157,12 @@ pnfs_update_layout(struct inode *ino, case -ERECALLCONFLICT: case -EAGAIN: break; + case -ENODATA: + /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */ + pnfs_layout_set_fail_bit( + lo, pnfs_iomode_to_fail_bit(iomode)); + lseg = NULL; + goto out_put_layout_hdr; default: if (!nfs_error_is_fatal(PTR_ERR(lseg))) { pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); @@ -2406,7 +2416,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget; } - if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo)) + if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && + !pnfs_is_first_layoutget(lo)) goto out_forget; if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 11d9ed9addc0..a7cf84a6673b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -107,6 +107,7 @@ enum { NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */ NFS_LAYOUT_HASHED, /* The layout visible */ + NFS_LAYOUT_DRAIN, }; enum layoutdriver_policy_flags { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5d07799513a6..dc08a0c02f09 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -675,11 +675,7 @@ static int nfs_writepage_locked(struct page *page, err = nfs_do_writepage(page, wbc, &pgio); pgio.pg_error = 0; nfs_pageio_complete(&pgio); - if (err < 0) - return err; - if (nfs_error_is_fatal(pgio.pg_error)) - return pgio.pg_error; - return 0; + return err; } int nfs_writepage(struct page *page, struct writeback_control *wbc) @@ -730,9 +726,6 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) if (err < 0) goto out_err; - err = pgio.pg_error; - if (nfs_error_is_fatal(err)) - goto out_err; return 0; out_err: return err; @@ -1411,7 +1404,7 @@ static void nfs_async_write_error(struct list_head *head, int error) while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - if (nfs_error_is_fatal(error)) + if (nfs_error_is_fatal_on_server(error)) nfs_write_error(req, error); else nfs_redirty_request(req); diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index acd0898e3866..e30e1ddc1ace 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -194,7 +194,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); } nf->nf_mark = NULL; - init_rwsem(&nf->nf_rwsem); trace_nfsd_file_alloc(nf); } return nf; diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 7872df5a0fe3..435ceab27897 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -46,7 +46,6 @@ struct nfsd_file { refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; - struct rw_semaphore nf_rwsem; }; int nfsd_file_cache_init(void); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7850d141c762..735ee8a79870 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1380,6 +1380,8 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) { + struct file *dst = copy->nf_dst->nf_file; + struct file *src = copy->nf_src->nf_file; ssize_t bytes_copied = 0; size_t bytes_total = copy->cp_count; u64 src_pos = copy->cp_src_pos; @@ -1388,9 +1390,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) do { if (kthread_should_stop()) break; - bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file, - src_pos, copy->nf_dst->nf_file, dst_pos, - bytes_total); + bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos, + bytes_total); if (bytes_copied <= 0) break; bytes_total -= bytes_copied; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 84dd68091f42..f1b503bec222 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7122,16 +7122,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, if (sop->so_is_open_owner || !same_owner_str(sop, owner)) continue; - /* see if there are still any locks associated with it */ - lo = lockowner(sop); - list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { - if (check_for_locks(stp->st_stid.sc_file, lo)) { - status = nfserr_locks_held; - spin_unlock(&clp->cl_lock); - return status; - } + if (atomic_read(&sop->so_count) != 1) { + spin_unlock(&clp->cl_lock); + return nfserr_locks_held; } + lo = lockowner(sop); nfs4_get_stateowner(sop); break; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 011cd570b50d..c852bb5ff212 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -535,10 +535,11 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, { struct file *src = nf_src->nf_file; struct file *dst = nf_dst->nf_file; + errseq_t since; loff_t cloned; __be32 ret = 0; - down_write(&nf_dst->nf_rwsem); + since = READ_ONCE(dst->f_wb_err); cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); if (cloned < 0) { ret = nfserrno(cloned); @@ -552,6 +553,8 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); + if (!status) + status = filemap_check_wb_err(dst->f_mapping, since); if (!status) status = commit_inode_metadata(file_inode(src)); if (status < 0) { @@ -561,7 +564,6 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, } } out_err: - up_write(&nf_dst->nf_rwsem); return ret; } @@ -980,6 +982,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, struct file *file = nf->nf_file; struct svc_export *exp; struct iov_iter iter; + errseq_t since; __be32 nfserr; int host_err; int use_wgather; @@ -1009,21 +1012,18 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, flags |= RWF_SYNC; iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); + since = READ_ONCE(file->f_wb_err); if (flags & RWF_SYNC) { - down_write(&nf->nf_rwsem); host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), nfsd_net_id)); - up_write(&nf->nf_rwsem); } else { - down_read(&nf->nf_rwsem); if (verf) nfsd_copy_boot_verifier(verf, net_generic(SVC_NET(rqstp), nfsd_net_id)); host_err = vfs_iter_write(file, &iter, &pos, flags); - up_read(&nf->nf_rwsem); } if (host_err < 0) { nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), @@ -1033,6 +1033,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, *cnt = host_err; nfsdstats.io_write += *cnt; fsnotify_modify(file); + host_err = filemap_check_wb_err(file->f_mapping, since); + if (host_err < 0) + goto out_nfserr; if (stable && use_wgather) { host_err = wait_for_concurrent_writes(file); @@ -1113,19 +1116,6 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, } #ifdef CONFIG_NFSD_V3 -static int -nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset, - loff_t end) -{ - struct address_space *mapping = nf->nf_file->f_mapping; - int ret = filemap_fdatawrite_range(mapping, offset, end); - - if (ret) - return ret; - filemap_fdatawait_range_keep_errors(mapping, offset, end); - return 0; -} - /* * Commit all pending writes to stable storage. * @@ -1156,25 +1146,26 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) goto out; if (EX_ISSYNC(fhp->fh_export)) { - int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end); + errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); + int err2; - down_write(&nf->nf_rwsem); - if (!err2) - err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); + err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); switch (err2) { case 0: nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, nfsd_net_id)); + err2 = filemap_check_wb_err(nf->nf_file->f_mapping, + since); + err = nfserrno(err2); break; case -EINVAL: err = nfserr_notsupp; break; default: - err = nfserrno(err2); nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); + err = nfserrno(err2); } - up_write(&nf->nf_rwsem); } else nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, nfsd_net_id)); diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 4391fd3abd8f..e00e184b1261 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -20,6 +20,23 @@ #include "page.h" #include "btnode.h" + +/** + * nilfs_init_btnc_inode - initialize B-tree node cache inode + * @btnc_inode: inode to be initialized + * + * nilfs_init_btnc_inode() sets up an inode for B-tree node cache. + */ +void nilfs_init_btnc_inode(struct inode *btnc_inode) +{ + struct nilfs_inode_info *ii = NILFS_I(btnc_inode); + + btnc_inode->i_mode = S_IFREG; + ii->i_flags = 0; + memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); +} + void nilfs_btnode_cache_clear(struct address_space *btnc) { invalidate_mapping_pages(btnc, 0, -1); @@ -29,7 +46,7 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) struct buffer_head * nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) { - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; struct buffer_head *bh; bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); @@ -57,7 +74,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, struct buffer_head **pbh, sector_t *submit_ptr) { struct buffer_head *bh; - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; struct page *page; int err; @@ -157,7 +174,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) { struct buffer_head *obh, *nbh; - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; int err; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 0f88dbc9bcb3..05ab64d354dc 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -30,6 +30,7 @@ struct nilfs_btnode_chkey_ctxt { struct buffer_head *newbh; }; +void nilfs_init_btnc_inode(struct inode *btnc_inode); void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index f42ab57201e7..77efd69213a3 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -58,7 +58,8 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path) static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { - struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); @@ -470,7 +471,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp, const struct nilfs_btree_readahead_info *ra) { - struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh, *ra_bh; sector_t submit_ptr = 0; int ret; @@ -1742,6 +1744,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, dat = nilfs_bmap_get_dat(btree); } + ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); + if (ret < 0) + return ret; + ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); if (ret < 0) return ret; @@ -1914,7 +1920,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; path[level].bp_ctxt.bh = path[level].bp_bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) { nilfs_dat_abort_update(dat, @@ -1940,7 +1946,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); path[level].bp_bh = path[level].bp_ctxt.bh; } @@ -1959,7 +1965,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, &path[level].bp_newreq.bpr_req); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); } @@ -2135,7 +2141,8 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct list_head *listp) { - struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; + struct address_space *btcache = btnc_inode->i_mapping; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; struct pagevec pvec; struct buffer_head *bh, *head; @@ -2189,12 +2196,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, path[level].bp_ctxt.newkey = blocknr; path[level].bp_ctxt.bh = *bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) return ret; nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(btree)->i_btnode_cache, + NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); *bh = path[level].bp_ctxt.bh; } @@ -2399,6 +2406,10 @@ int nilfs_btree_init(struct nilfs_bmap *bmap) if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) ret = -EIO; + else + ret = nilfs_attach_btree_node_cache( + &NILFS_BMAP_I(bmap)->vfs_inode); + return ret; } diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 8bccdf1158fc..1a3d183027b9 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -497,7 +497,9 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, di = NILFS_DAT_I(dat); lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); nilfs_palloc_setup_cache(dat, &di->palloc_cache); - nilfs_mdt_setup_shadow_map(dat, &di->shadow); + err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); + if (err) + goto failed; err = nilfs_read_inode_common(dat, raw_inode); if (err) diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 448320496856..aadea660c66c 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -126,9 +126,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) { + struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode; int ret; - ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, + ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, vbn ? : pbn, pbn, REQ_OP_READ, 0, out_bh, &pbn); if (ret == -EEXIST) /* internal code (cache hit) */ @@ -170,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode) ii->i_flags = 0; nilfs_bmap_init_gc(ii->i_bmap); - return 0; + return nilfs_attach_btree_node_cache(inode); } /** @@ -185,7 +186,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); list_del_init(&ii->i_dirty); truncate_inode_pages(&ii->vfs_inode.i_data, 0); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 745d371d6fea..95684fa3c985 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -29,12 +29,16 @@ * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) * @for_gc: inode for GC flag + * @for_btnc: inode for B-tree node cache flag + * @for_shadow: inode for shadowed page cache flag */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - int for_gc; + bool for_gc; + bool for_btnc; + bool for_shadow; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -314,7 +318,8 @@ static int nilfs_insert_inode_locked(struct inode *inode, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false, .for_shadow = false }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -527,6 +532,19 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); + if (test_bit(NILFS_I_BTNC, &ii->i_state)) { + if (!args->for_btnc) + return 0; + } else if (args->for_btnc) { + return 0; + } + if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { + if (!args->for_shadow) + return 0; + } else if (args->for_shadow) { + return 0; + } + if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) return !args->for_gc; @@ -538,15 +556,17 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) struct nilfs_iget_args *args = opaque; inode->i_ino = args->ino; - if (args->for_gc) { + NILFS_I(inode)->i_cno = args->cno; + NILFS_I(inode)->i_root = args->root; + if (args->root && args->ino == NILFS_ROOT_INO) + nilfs_get_root(args->root); + + if (args->for_gc) NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); - NILFS_I(inode)->i_cno = args->cno; - NILFS_I(inode)->i_root = NULL; - } else { - if (args->root && args->ino == NILFS_ROOT_INO) - nilfs_get_root(args->root); - NILFS_I(inode)->i_root = args->root; - } + if (args->for_btnc) + NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); + if (args->for_shadow) + NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); return 0; } @@ -554,7 +574,8 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false, .for_shadow = false }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -564,7 +585,8 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = 0 + .ino = ino, .root = root, .cno = 0, .for_gc = false, + .for_btnc = false, .for_shadow = false }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -595,7 +617,8 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 + .ino = ino, .root = NULL, .cno = cno, .for_gc = true, + .for_btnc = false, .for_shadow = false }; struct inode *inode; int err; @@ -615,6 +638,113 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, return inode; } +/** + * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode + * @inode: inode object + * + * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, + * or does nothing if the inode already has it. This function allocates + * an additional inode to maintain page cache of B-tree nodes one-on-one. + * + * Return Value: On success, 0 is returned. On errors, one of the following + * negative error code is returned. + * + * %-ENOMEM - Insufficient memory available. + */ +int nilfs_attach_btree_node_cache(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct inode *btnc_inode; + struct nilfs_iget_args args; + + if (ii->i_assoc_inode) + return 0; + + args.ino = inode->i_ino; + args.root = ii->i_root; + args.cno = ii->i_cno; + args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; + args.for_btnc = true; + args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; + + btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); + if (unlikely(!btnc_inode)) + return -ENOMEM; + if (btnc_inode->i_state & I_NEW) { + nilfs_init_btnc_inode(btnc_inode); + unlock_new_inode(btnc_inode); + } + NILFS_I(btnc_inode)->i_assoc_inode = inode; + NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; + ii->i_assoc_inode = btnc_inode; + + return 0; +} + +/** + * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode + * @inode: inode object + * + * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its + * holder inode bound to @inode, or does nothing if @inode doesn't have it. + */ +void nilfs_detach_btree_node_cache(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct inode *btnc_inode = ii->i_assoc_inode; + + if (btnc_inode) { + NILFS_I(btnc_inode)->i_assoc_inode = NULL; + ii->i_assoc_inode = NULL; + iput(btnc_inode); + } +} + +/** + * nilfs_iget_for_shadow - obtain inode for shadow mapping + * @inode: inode object that uses shadow mapping + * + * nilfs_iget_for_shadow() allocates a pair of inodes that holds page + * caches for shadow mapping. The page cache for data pages is set up + * in one inode and the one for b-tree node pages is set up in the + * other inode, which is attached to the former inode. + * + * Return Value: On success, a pointer to the inode for data pages is + * returned. On errors, one of the following negative error code is returned + * in a pointer type. + * + * %-ENOMEM - Insufficient memory available. + */ +struct inode *nilfs_iget_for_shadow(struct inode *inode) +{ + struct nilfs_iget_args args = { + .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, + .for_btnc = false, .for_shadow = true + }; + struct inode *s_inode; + int err; + + s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, + nilfs_iget_set, &args); + if (unlikely(!s_inode)) + return ERR_PTR(-ENOMEM); + if (!(s_inode->i_state & I_NEW)) + return inode; + + NILFS_I(s_inode)->i_flags = 0; + memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); + + err = nilfs_attach_btree_node_cache(s_inode); + if (unlikely(err)) { + iget_failed(s_inode); + return ERR_PTR(err); + } + unlock_new_inode(s_inode); + return s_inode; +} + void nilfs_write_inode_common(struct inode *inode, struct nilfs_inode *raw_inode, int has_bmap) { @@ -762,7 +892,8 @@ static void nilfs_clear_inode(struct inode *inode) if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + if (!test_bit(NILFS_I_BTNC, &ii->i_state)) + nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) nilfs_put_root(ii->i_root); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index c0361ce45f62..e80ef2c0a785 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -469,9 +469,18 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) void nilfs_mdt_clear(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mdi->mi_shadow; if (mdi->mi_palloc_cache) nilfs_palloc_destroy_cache(inode); + + if (shadow) { + struct inode *s_inode = shadow->inode; + + shadow->inode = NULL; + iput(s_inode); + mdi->mi_shadow = NULL; + } } /** @@ -505,12 +514,15 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, struct nilfs_shadow_map *shadow) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct inode *s_inode; INIT_LIST_HEAD(&shadow->frozen_buffers); - address_space_init_once(&shadow->frozen_data); - nilfs_mapping_init(&shadow->frozen_data, inode); - address_space_init_once(&shadow->frozen_btnodes); - nilfs_mapping_init(&shadow->frozen_btnodes, inode); + + s_inode = nilfs_iget_for_shadow(inode); + if (IS_ERR(s_inode)) + return PTR_ERR(s_inode); + + shadow->inode = s_inode; mi->mi_shadow = shadow; return 0; } @@ -524,14 +536,15 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode) struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; + struct inode *s_inode = shadow->inode; int ret; - ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); + ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping); if (ret) goto out; - ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, - &ii->i_btnode_cache); + ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping, + ii->i_assoc_inode->i_mapping); if (ret) goto out; @@ -547,7 +560,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) struct page *page; int blkbits = inode->i_blkbits; - page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); + page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index); if (!page) return -ENOMEM; @@ -579,7 +592,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) struct page *page; int n; - page = find_lock_page(&shadow->frozen_data, bh->b_page->index); + page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index); if (page) { if (page_has_buffers(page)) { n = bh_offset(bh) >> inode->i_blkbits; @@ -620,10 +633,11 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) nilfs_palloc_clear_cache(inode); nilfs_clear_dirty_pages(inode->i_mapping, true); - nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); + nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); - nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); - nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, + NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); @@ -638,10 +652,11 @@ void nilfs_mdt_clear_shadow_map(struct inode *inode) { struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_shadow_map *shadow = mi->mi_shadow; + struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode; down_write(&mi->mi_sem); nilfs_release_frozen_buffers(shadow); - truncate_inode_pages(&shadow->frozen_data, 0); - truncate_inode_pages(&shadow->frozen_btnodes, 0); + truncate_inode_pages(shadow->inode->i_mapping, 0); + truncate_inode_pages(shadow_btnc_inode->i_mapping, 0); up_write(&mi->mi_sem); } diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index e77aea4bb921..9d8ac0d27c16 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -18,14 +18,12 @@ /** * struct nilfs_shadow_map - shadow mapping of meta data file * @bmap_store: shadow copy of bmap state - * @frozen_data: shadowed dirty data pages - * @frozen_btnodes: shadowed dirty b-tree nodes' pages + * @inode: holder of page caches used in shadow mapping * @frozen_buffers: list of frozen buffers */ struct nilfs_shadow_map { struct nilfs_bmap_store bmap_store; - struct address_space frozen_data; - struct address_space frozen_btnodes; + struct inode *inode; struct list_head frozen_buffers; }; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index f8450ee3fd06..ace27a89fbb0 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -28,7 +28,7 @@ * @i_xattr: * @i_dir_start_lookup: page index of last successful search * @i_cno: checkpoint number for GC inode - * @i_btnode_cache: cached pages of b-tree nodes + * @i_assoc_inode: associated inode (B-tree node cache holder or back pointer) * @i_dirty: list for connecting dirty files * @xattr_sem: semaphore for extended attributes processing * @i_bh: buffer contains disk inode @@ -43,7 +43,7 @@ struct nilfs_inode_info { __u64 i_xattr; /* sector_t ??? */ __u32 i_dir_start_lookup; __u64 i_cno; /* check point number for GC inode */ - struct address_space i_btnode_cache; + struct inode *i_assoc_inode; struct list_head i_dirty; /* List for connecting dirty files */ #ifdef CONFIG_NILFS_XATTR @@ -75,13 +75,6 @@ NILFS_BMAP_I(const struct nilfs_bmap *bmap) return container_of(bmap, struct nilfs_inode_info, i_bmap_data); } -static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) -{ - struct nilfs_inode_info *ii = - container_of(btnc, struct nilfs_inode_info, i_btnode_cache); - return &ii->vfs_inode; -} - /* * Dynamic state flags of NILFS on-memory inode (i_state) */ @@ -98,6 +91,8 @@ enum { NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ NILFS_I_GCINODE, /* inode for GC, on memory only */ + NILFS_I_BTNC, /* inode for btree node cache */ + NILFS_I_SHADOW, /* inode for shadowed page cache */ }; /* @@ -203,6 +198,9 @@ static inline int nilfs_acl_chmod(struct inode *inode) static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) { + if (S_ISLNK(inode->i_mode)) + return 0; + inode->i_mode &= ~current_umask(); return 0; } @@ -264,6 +262,9 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, unsigned long ino); extern struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno); +int nilfs_attach_btree_node_cache(struct inode *inode); +void nilfs_detach_btree_node_cache(struct inode *inode); +struct inode *nilfs_iget_for_shadow(struct inode *inode); extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 171fb5cd427f..d1a148f0cae3 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -448,10 +448,9 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode) /* * NILFS2 needs clear_page_dirty() in the following two cases: * - * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears - * page dirty flags when it copies back pages from the shadow cache - * (gcdat->{i_mapping,i_btnode_cache}) to its original cache - * (dat->{i_mapping,i_btnode_cache}). + * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty + * flag of pages when it copies back pages from shadow cache to the + * original cache. * * 2) Some B-tree operations like insertion or deletion may dispose buffers * in dirty state, and this needs to cancel the dirty state of their pages. diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index e3726aca28ed..8350c2eaee75 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -738,15 +738,18 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, struct list_head *listp) { struct nilfs_inode_info *ii = NILFS_I(inode); - struct address_space *mapping = &ii->i_btnode_cache; + struct inode *btnc_inode = ii->i_assoc_inode; struct pagevec pvec; struct buffer_head *bh, *head; unsigned int i; pgoff_t index = 0; + if (!btnc_inode) + return; + pagevec_init(&pvec); - while (pagevec_lookup_tag(&pvec, mapping, &index, + while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index, PAGECACHE_TAG_DIRTY)) { for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); @@ -2415,7 +2418,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) continue; list_del_init(&ii->i_dirty); truncate_inode_pages(&ii->vfs_inode.i_data, 0); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index ab1a5e8467f2..010037d38973 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -158,7 +158,8 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) ii->i_bh = NULL; ii->i_state = 0; ii->i_cno = 0; - nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode); + ii->i_assoc_inode = NULL; + ii->i_bmap = &ii->i_bmap_data; return &ii->vfs_inode; } @@ -1378,8 +1379,6 @@ static void nilfs_inode_init_once(void *obj) #ifdef CONFIG_NILFS_XATTR init_rwsem(&ii->xattr_sem); #endif - address_space_init_once(&ii->i_btnode_cache); - ii->i_bmap = &ii->i_bmap_data; inode_init_once(&ii->vfs_inode); } diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index f0d6b54be412..765b50aeadd2 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -83,16 +83,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); inode = igrab(fsnotify_conn_inode(mark->connector)); if (inode) { - /* - * IN_ALL_EVENTS represents all of the mask bits - * that we expose to userspace. There is at - * least one bit (FS_EVENT_ON_CHILD) which is - * used only internally to the kernel. - */ - u32 mask = mark->mask & IN_ALL_EVENTS; - seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ", + seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ", inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, - mask, mark->ignored_mask); + inotify_mark_user_mask(mark)); show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 2007e3711916..8f00151eb731 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -22,6 +22,18 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct inotify_event_info, fse); } +/* + * INOTIFY_USER_FLAGS represents all of the mask bits that we expose to + * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is + * used only internally to the kernel. + */ +#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK) + +static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark) +{ + return fsn_mark->mask & INOTIFY_USER_MASK; +} + extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group); extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ca7b8185e260..9ea915e9d2a1 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -88,7 +88,7 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) mask |= FS_EVENT_ON_CHILD; /* mask off the flags used to open the fd */ - mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); + mask |= (arg & INOTIFY_USER_MASK); return mask; } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 8387937b9d01..5b44be5f93dd 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -430,7 +430,7 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group) { - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + mutex_lock(&group->mark_mutex); fsnotify_detach_mark(mark); mutex_unlock(&group->mark_mutex); fsnotify_free_mark(mark); @@ -742,7 +742,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, * move marks to free to to_free list in one go and then free marks in * to_free list one by one. */ - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + mutex_lock(&group->mark_mutex); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { if ((1U << mark->connector->type) & type_mask) list_move(&mark->g_list, &to_free); @@ -751,7 +751,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, clear: while (1) { - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + mutex_lock(&group->mark_mutex); if (list_empty(head)) { mutex_unlock(&group->mark_mutex); break; diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index d563abc3e136..914e99173130 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, a = (ATTR_RECORD*)((u8*)ctx->attr + le32_to_cpu(ctx->attr->length)); for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { - if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + - le32_to_cpu(ctx->mrec->bytes_allocated)) + u8 *mrec_end = (u8 *)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_allocated); + u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + + a->name_length * sizeof(ntfschar); + if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || + name_end > mrec_end) break; ctx->attr = a; if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c index 339f098d9592..8fa289de3939 100644 --- a/fs/ocfs2/dlmfs/userdlm.c +++ b/fs/ocfs2/dlmfs/userdlm.c @@ -435,6 +435,11 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres, } spin_lock(&lockres->l_lock); + if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { + spin_unlock(&lockres->l_lock); + status = -EAGAIN; + goto bail; + } /* We only compare against the currently granted level * here. If the lock is blocked waiting on a downconvert, @@ -597,7 +602,7 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) spin_lock(&lockres->l_lock); if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { spin_unlock(&lockres->l_lock); - return 0; + goto bail; } lockres->l_flags |= USER_LOCK_IN_TEARDOWN; @@ -611,12 +616,17 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) } if (lockres->l_ro_holders || lockres->l_ex_holders) { + lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN; spin_unlock(&lockres->l_lock); goto bail; } status = 0; if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { + /* + * lock is never requested, leave USER_LOCK_IN_TEARDOWN set + * to avoid new lock request coming in. + */ spin_unlock(&lockres->l_lock); goto bail; } @@ -627,6 +637,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK); if (status) { + spin_lock(&lockres->l_lock); + lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN; + lockres->l_flags &= ~USER_LOCK_BUSY; + spin_unlock(&lockres->l_lock); user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); goto bail; } diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7993d527edae..0a8cd8e59a92 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -279,7 +279,6 @@ enum ocfs2_mount_options OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ - OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ }; #define OCFS2_OSB_SOFT_RO 0x0001 @@ -675,8 +674,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) static inline int ocfs2_mount_local(struct ocfs2_super *osb) { - return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) - || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); + return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); } static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 4da0e4b1e79b..8caeceeaeda7 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -254,16 +254,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, int i, ret = -ENOSPC; if ((preferred >= 0) && (preferred < si->si_num_slots)) { - if (!si->si_slots[preferred].sl_valid || - !si->si_slots[preferred].sl_node_num) { + if (!si->si_slots[preferred].sl_valid) { ret = preferred; goto out; } } for(i = 0; i < si->si_num_slots; i++) { - if (!si->si_slots[i].sl_valid || - !si->si_slots[i].sl_node_num) { + if (!si->si_slots[i].sl_valid) { ret = i; break; } @@ -458,30 +456,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb) spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); - if (ocfs2_mount_local(osb)) - /* use slot 0 directly in local mode */ - slot = 0; - else { - /* search for ourselves first and take the slot if it already - * exists. Perhaps we need to mark this in a variable for our - * own journal recovery? Possibly not, though we certainly - * need to warn to the user */ - slot = __ocfs2_node_num_to_slot(si, osb->node_num); + /* search for ourselves first and take the slot if it already + * exists. Perhaps we need to mark this in a variable for our + * own journal recovery? Possibly not, though we certainly + * need to warn to the user */ + slot = __ocfs2_node_num_to_slot(si, osb->node_num); + if (slot < 0) { + /* if no slot yet, then just take 1st available + * one. */ + slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); if (slot < 0) { - /* if no slot yet, then just take 1st available - * one. */ - slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); - if (slot < 0) { - spin_unlock(&osb->osb_lock); - mlog(ML_ERROR, "no free slots available!\n"); - status = -EINVAL; - goto bail; - } - } else - printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " - "already allocated to this node!\n", - slot, osb->dev_str); - } + spin_unlock(&osb->osb_lock); + mlog(ML_ERROR, "no free slots available!\n"); + status = -EINVAL; + goto bail; + } + } else + printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " + "allocated to this node!\n", slot, osb->dev_str); ocfs2_set_slot(si, slot, osb->node_num); osb->slot_num = slot; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d7eb00f1ea8a..9eff7ffeda6e 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -176,7 +176,6 @@ enum { Opt_dir_resv_level, Opt_journal_async_commit, Opt_err_cont, - Opt_nocluster, Opt_err, }; @@ -210,7 +209,6 @@ static const match_table_t tokens = { {Opt_dir_resv_level, "dir_resv_level=%u"}, {Opt_journal_async_commit, "journal_async_commit"}, {Opt_err_cont, "errors=continue"}, - {Opt_nocluster, "nocluster"}, {Opt_err, NULL} }; @@ -622,13 +620,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) goto out; } - tmp = OCFS2_MOUNT_NOCLUSTER; - if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { - ret = -EINVAL; - mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); - goto out; - } - tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | OCFS2_MOUNT_HB_NONE; if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { @@ -869,7 +860,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, } if (ocfs2_userspace_stack(osb) && - !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && strncmp(osb->osb_cluster_stack, mopt->cluster_stack, OCFS2_STACK_LABEL_LEN)) { mlog(ML_ERROR, @@ -1150,11 +1140,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : "ordered"); - if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && - !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) - printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " - "without cluster aware mode.\n", osb->dev_str); - atomic_set(&osb->vol_state, VOLUME_MOUNTED); wake_up(&osb->osb_mount_event); @@ -1461,9 +1446,6 @@ static int ocfs2_parse_options(struct super_block *sb, case Opt_journal_async_commit: mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; break; - case Opt_nocluster: - mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; - break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -1575,9 +1557,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) seq_printf(s, ",journal_async_commit"); - if (opts & OCFS2_MOUNT_NOCLUSTER) - seq_printf(s, ",nocluster"); - return 0; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 09e4d8a499a3..5898761698c2 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -453,6 +453,9 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, proc_set_user(ent, (*parent)->uid, (*parent)->gid); ent->proc_dops = &proc_misc_dentry_ops; + /* Revalidate everything under /proc/${pid}/net */ + if ((*parent)->proc_dops == &proc_net_dentry_ops) + pde_force_lookup(ent); out: return ent; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 1aa9236bf1af..707477e27f83 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -362,6 +362,9 @@ static __net_init int proc_net_ns_init(struct net *net) proc_set_user(netd, uid, gid); + /* Seed dentry revalidation for /proc/${pid}/net */ + pde_force_lookup(netd); + err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 09fb8459bb5c..65f123d5809b 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -79,6 +79,7 @@ #include #include #include +#include #include "../internal.h" /* ugh */ #include @@ -427,9 +428,11 @@ EXPORT_SYMBOL(mark_info_dirty); int dquot_acquire(struct dquot *dquot) { int ret = 0, ret2 = 0; + unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); + memalloc = memalloc_nofs_save(); if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot); if (ret < 0) @@ -460,6 +463,7 @@ int dquot_acquire(struct dquot *dquot) smp_mb__before_atomic(); set_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_iolock: + memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } @@ -471,9 +475,11 @@ EXPORT_SYMBOL(dquot_acquire); int dquot_commit(struct dquot *dquot) { int ret = 0; + unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); + memalloc = memalloc_nofs_save(); if (!clear_dquot_dirty(dquot)) goto out_lock; /* Inactive dquot can be only if there was error during read/init @@ -483,6 +489,7 @@ int dquot_commit(struct dquot *dquot) else ret = -EIO; out_lock: + memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } @@ -494,9 +501,11 @@ EXPORT_SYMBOL(dquot_commit); int dquot_release(struct dquot *dquot) { int ret = 0, ret2 = 0; + unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); + memalloc = memalloc_nofs_save(); /* Check whether we are not racing with some other dqget() */ if (dquot_is_busy(dquot)) goto out_dqlock; @@ -512,6 +521,7 @@ int dquot_release(struct dquot *dquot) } clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_dqlock: + memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } diff --git a/fs/remap_range.c b/fs/remap_range.c index e6099beefa97..e8e00e217d6c 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -71,7 +71,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, * Otherwise, make sure the count is also block-aligned, having * already confirmed the starting offsets' block alignment. */ - if (pos_in + count == size_in) { + if (pos_in + count == size_in && + (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) { bcount = ALIGN(size_in, bs) - pos_in; } else { if (!IS_ALIGNED(count, bs)) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 96ac7e562b87..fcca36bbd997 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -876,21 +876,18 @@ xfs_attr_node_hasname( state = xfs_da_state_alloc(args); if (statep != NULL) - *statep = NULL; + *statep = state; /* * Search to see if name exists, and get back a pointer to it. */ error = xfs_da3_node_lookup_int(state, &retval); - if (error) { - xfs_da_state_free(state); - return error; - } + if (error) + retval = error; - if (statep != NULL) - *statep = state; - else + if (!statep) xfs_da_state_free(state); + return retval; } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index d9a692484eae..de9c27ef68d8 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -6229,6 +6229,11 @@ xfs_bmap_validate_extent( xfs_fsblock_t endfsb; bool isrt; + if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) + return __this_address; + if (irec->br_startoff + irec->br_blockcount <= irec->br_startoff) + return __this_address; + isrt = XFS_IS_REALTIME_INODE(ip); endfsb = irec->br_startblock + irec->br_blockcount - 1; if (isrt && whichfork == XFS_DATA_FORK) { diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 2d25bab68764..24c7d30e41df 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -353,20 +353,17 @@ xfs_btree_free_block( */ void xfs_btree_del_cursor( - xfs_btree_cur_t *cur, /* btree cursor */ - int error) /* del because of error */ + struct xfs_btree_cur *cur, /* btree cursor */ + int error) /* del because of error */ { - int i; /* btree level */ + int i; /* btree level */ /* - * Clear the buffer pointers, and release the buffers. - * If we're doing this in the face of an error, we - * need to make sure to inspect all of the entries - * in the bc_bufs array for buffers to be unlocked. - * This is because some of the btree code works from - * level n down to 0, and if we get an error along - * the way we won't have initialized all the entries - * down to 0. + * Clear the buffer pointers and release the buffers. If we're doing + * this because of an error, inspect all of the entries in the bc_bufs + * array for buffers to be unlocked. This is because some of the btree + * code works from level n down to 0, and if we get an error along the + * way we won't have initialized all the entries down to 0. */ for (i = 0; i < cur->bc_nlevels; i++) { if (cur->bc_bufs[i]) @@ -374,17 +371,17 @@ xfs_btree_del_cursor( else if (!error) break; } + /* - * Can't free a bmap cursor without having dealt with the - * allocated indirect blocks' accounting. - */ - ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || - cur->bc_ino.allocated == 0); - /* - * Free the cursor. + * If we are doing a BMBT update, the number of unaccounted blocks + * allocated during this cursor life time should be zero. If it's not + * zero, then we should be shut down or on our way to shutdown due to + * cancelling a dirty transaction on error. */ + ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 || + XFS_FORCED_SHUTDOWN(cur->bc_mp) || error != 0); if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) - kmem_free((void *)cur->bc_ops); + kmem_free(cur->bc_ops); kmem_cache_free(xfs_btree_cur_zone, cur); } @@ -2814,7 +2811,7 @@ xfs_btree_split_worker( struct xfs_btree_split_args *args = container_of(work, struct xfs_btree_split_args, work); unsigned long pflags; - unsigned long new_pflags = PF_MEMALLOC_NOFS; + unsigned long new_pflags = 0; /* * we are in a transaction context here, but may also be doing work @@ -2826,12 +2823,20 @@ xfs_btree_split_worker( new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; current_set_flags_nested(&pflags, new_pflags); + xfs_trans_set_context(args->cur->bc_tp); args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, args->key, args->curp, args->stat); + + xfs_trans_clear_context(args->cur->bc_tp); + current_restore_flags_nested(&pflags, new_pflags); + + /* + * Do not access args after complete() has run here. We don't own args + * and the owner may run and free args before we return here. + */ complete(args->done); - current_restore_flags_nested(&pflags, new_pflags); } /* diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index e55378640b05..d03e6098ded9 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -47,8 +47,6 @@ extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t ino, xfs_extlen_t tot); -extern bool xfs_dir2_sf_replace_needblock(struct xfs_inode *dp, - xfs_ino_t inum); extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t inum, xfs_extlen_t tot); diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 2463b5d73447..8c4f76bba88b 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -1018,7 +1018,7 @@ xfs_dir2_sf_removename( /* * Check whether the sf dir replace operation need more blocks. */ -bool +static bool xfs_dir2_sf_replace_needblock( struct xfs_inode *dp, xfs_ino_t inum) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 8bd00da6d2a4..2f46ef3800aa 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -414,7 +414,16 @@ struct xfs_log_dinode { /* start of the extended dinode, writable fields */ uint32_t di_crc; /* CRC of the inode */ uint64_t di_changecount; /* number of attribute changes */ - xfs_lsn_t di_lsn; /* flush sequence */ + + /* + * The LSN we write to this field during formatting is not a reflection + * of the current on-disk LSN. It should never be used for recovery + * sequencing, nor should it be recovered into the on-disk inode at all. + * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk() + * for details. + */ + xfs_lsn_t di_lsn; + uint64_t di_flags2; /* more random flags */ uint32_t di_cowextsize; /* basic cow extent size for file */ uint8_t di_pad2[12]; /* more padding for future expansion */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 5aeafa59ed27..66e8353da2f3 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -956,9 +956,19 @@ xfs_log_sb( struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *bp = xfs_trans_getsb(tp); - mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); - mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); - mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); + /* + * Lazy sb counters don't update the in-core superblock so do that now. + * If this is at unmount, the counters will be exactly correct, but at + * any other time they will only be ballpark correct because of + * reservations that have been taken out percpu counters. If we have an + * unclean shutdown, this will be corrected by log recovery rebuilding + * the counters from the AGF block counts. + */ + if (xfs_sb_version_haslazysbcount(&mp->m_sb)) { + mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); + mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); + mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); + } xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 397d94775440..1ce06173c2f5 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -21,6 +21,7 @@ typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */ typedef uint32_t xfs_rtword_t; /* word type for bitmap manipulations */ typedef int64_t xfs_lsn_t; /* log sequence number */ +typedef int64_t xfs_csn_t; /* CIL sequence number */ typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ typedef uint32_t xfs_dahash_t; /* dir/attr hash value */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4304c6416fbb..953de843d9c3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc( * We hand off the transaction to the completion thread now, so * clear the flag here. */ - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + xfs_trans_clear_context(tp); return 0; } @@ -125,7 +125,7 @@ xfs_setfilesize_ioend( * thus we need to mark ourselves as being in a transaction manually. * Similarly for freeze protection. */ - current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + xfs_trans_set_context(tp); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); /* we abort the update if there was an IO error */ @@ -145,6 +145,7 @@ xfs_end_ioend( struct iomap_ioend *ioend) { struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_mount *mp = ip->i_mount; xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; unsigned int nofs_flag; @@ -160,18 +161,26 @@ xfs_end_ioend( /* * Just clean up the in-memory strutures if the fs has been shut down. */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + if (XFS_FORCED_SHUTDOWN(mp)) { error = -EIO; goto done; } /* - * Clean up any COW blocks on an I/O error. + * Clean up all COW blocks and underlying data fork delalloc blocks on + * I/O error. The delalloc punch is required because this ioend was + * mapped to blocks in the COW fork and the associated pages are no + * longer dirty. If we don't remove delalloc blocks here, they become + * stale and can corrupt free space accounting on unmount. */ error = blk_status_to_errno(ioend->io_bio->bi_status); if (unlikely(error)) { - if (ioend->io_flags & IOMAP_F_SHARED) + if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); + xfs_bmap_punch_delalloc_range(ip, + XFS_B_TO_FSBT(mp, offset), + XFS_B_TO_FSB(mp, size)); + } goto done; } @@ -568,6 +577,12 @@ xfs_vm_writepage( { struct xfs_writepage_ctx wpc = { }; + if (WARN_ON_ONCE(current->journal_info)) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops); } @@ -578,6 +593,13 @@ xfs_vm_writepages( { struct xfs_writepage_ctx wpc = { }; + /* + * Writing back data in a transaction context can result in recursive + * transactions. This is bad, so issue a warning and get out of here. + */ + if (WARN_ON_ONCE(current->journal_info)) + return 0; + xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 0356f2e340a1..a3d5ecccfc2c 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -56,14 +56,12 @@ xfs_buf_log_format_size( } /* - * This returns the number of log iovecs needed to log the - * given buf log item. + * Return the number of log iovecs and space needed to log the given buf log + * item segment. * - * It calculates this as 1 iovec for the buf log format structure - * and 1 for each stretch of non-contiguous chunks to be logged. - * Contiguous chunks are logged in a single iovec. - * - * If the XFS_BLI_STALE flag has been set, then log nothing. + * It calculates this as 1 iovec for the buf log format structure and 1 for each + * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged + * in a single iovec. */ STATIC void xfs_buf_item_size_segment( @@ -119,11 +117,8 @@ xfs_buf_item_size_segment( } /* - * This returns the number of log iovecs needed to log the given buf log item. - * - * It calculates this as 1 iovec for the buf log format structure and 1 for each - * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged - * in a single iovec. + * Return the number of log iovecs and space needed to log the given buf log + * item. * * Discontiguous buffers need a format structure per region that is being * logged. This makes the changes in the buffer appear to log recovery as though @@ -133,7 +128,11 @@ xfs_buf_item_size_segment( * what ends up on disk. * * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log - * format structures. + * format structures. If the item has previously been logged and has dirty + * regions, we do not relog them in stale buffers. This has the effect of + * reducing the size of the relogged item by the amount of dirty data tracked + * by the log item. This can result in the committing transaction reducing the + * amount of space being consumed by the CIL. */ STATIC void xfs_buf_item_size( @@ -147,9 +146,9 @@ xfs_buf_item_size( ASSERT(atomic_read(&bip->bli_refcount) > 0); if (bip->bli_flags & XFS_BLI_STALE) { /* - * The buffer is stale, so all we need to log - * is the buf log format structure with the - * cancel flag in it. + * The buffer is stale, so all we need to log is the buf log + * format structure with the cancel flag in it as we are never + * going to replay the changes tracked in the log item. */ trace_xfs_buf_item_size_stale(bip); ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); @@ -164,9 +163,9 @@ xfs_buf_item_size( if (bip->bli_flags & XFS_BLI_ORDERED) { /* - * The buffer has been logged just to order it. - * It is not being included in the transaction - * commit, so no vectors are used at all. + * The buffer has been logged just to order it. It is not being + * included in the transaction commit, so no vectors are used at + * all. */ trace_xfs_buf_item_size_ordered(bip); *nvecs = XFS_LOG_VEC_ORDERED; @@ -394,17 +393,8 @@ xfs_buf_item_pin( } /* - * This is called to unpin the buffer associated with the buf log - * item which was previously pinned with a call to xfs_buf_item_pin(). - * - * Also drop the reference to the buf item for the current transaction. - * If the XFS_BLI_STALE flag is set and we are the last reference, - * then free up the buf log item and unlock the buffer. - * - * If the remove flag is set we are called from uncommit in the - * forced-shutdown path. If that is true and the reference count on - * the log item is going to drop to zero we need to free the item's - * descriptor in the transaction. + * This is called to unpin the buffer associated with the buf log item which + * was previously pinned with a call to xfs_buf_item_pin(). */ STATIC void xfs_buf_item_unpin( @@ -421,38 +411,35 @@ xfs_buf_item_unpin( trace_xfs_buf_item_unpin(bip); + /* + * Drop the bli ref associated with the pin and grab the hold required + * for the I/O simulation failure in the abort case. We have to do this + * before the pin count drops because the AIL doesn't acquire a bli + * reference. Therefore if the refcount drops to zero, the bli could + * still be AIL resident and the buffer submitted for I/O (and freed on + * completion) at any point before we return. This can be removed once + * the AIL properly holds a reference on the bli. + */ freed = atomic_dec_and_test(&bip->bli_refcount); - + if (freed && !stale && remove) + xfs_buf_hold(bp); if (atomic_dec_and_test(&bp->b_pin_count)) wake_up_all(&bp->b_waiters); - if (freed && stale) { + /* nothing to do but drop the pin count if the bli is active */ + if (!freed) + return; + + if (stale) { ASSERT(bip->bli_flags & XFS_BLI_STALE); ASSERT(xfs_buf_islocked(bp)); ASSERT(bp->b_flags & XBF_STALE); ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); + ASSERT(list_empty(&lip->li_trans)); + ASSERT(!bp->b_transp); trace_xfs_buf_item_unpin_stale(bip); - if (remove) { - /* - * If we are in a transaction context, we have to - * remove the log item from the transaction as we are - * about to release our reference to the buffer. If we - * don't, the unlock that occurs later in - * xfs_trans_uncommit() will try to reference the - * buffer which we no longer have a hold on. - */ - if (!list_empty(&lip->li_trans)) - xfs_trans_del_item(lip); - - /* - * Since the transaction no longer refers to the buffer, - * the buffer should no longer refer to the transaction. - */ - bp->b_transp = NULL; - } - /* * If we get called here because of an IO error, we may or may * not have the item on the AIL. xfs_trans_ail_delete() will @@ -469,13 +456,13 @@ xfs_buf_item_unpin( ASSERT(bp->b_log_item == NULL); } xfs_buf_relse(bp); - } else if (freed && remove) { + } else if (remove) { /* * The buffer must be locked and held by the caller to simulate - * an async I/O failure. + * an async I/O failure. We acquired the hold for this case + * before the buffer was unpinned. */ xfs_buf_lock(bp); - xfs_buf_hold(bp); bp->b_flags |= XBF_ASYNC; xfs_buf_ioend_fail(bp); } @@ -633,7 +620,7 @@ xfs_buf_item_release( STATIC void xfs_buf_item_committing( struct xfs_log_item *lip, - xfs_lsn_t commit_lsn) + xfs_csn_t seq) { return xfs_buf_item_release(lip); } diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d44e8b4a3391..b374c9cee117 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -796,6 +796,7 @@ xlog_recover_get_buf_lsn( switch (magicda) { case XFS_DIR3_LEAF1_MAGIC: case XFS_DIR3_LEAFN_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: case XFS_DA3_NODE_MAGIC: lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; @@ -805,7 +806,7 @@ xlog_recover_get_buf_lsn( } if (lsn != (xfs_lsn_t)-1) { - if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) + if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid)) goto recover_immediately; return lsn; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 1d95ed387d66..80c4579d6835 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -500,6 +500,42 @@ xfs_dquot_alloc( return dqp; } +/* Check the ondisk dquot's id and type match what the incore dquot expects. */ +static bool +xfs_dquot_check_type( + struct xfs_dquot *dqp, + struct xfs_disk_dquot *ddqp) +{ + uint8_t ddqp_type; + uint8_t dqp_type; + + ddqp_type = ddqp->d_type & XFS_DQTYPE_REC_MASK; + dqp_type = xfs_dquot_type(dqp); + + if (be32_to_cpu(ddqp->d_id) != dqp->q_id) + return false; + + /* + * V5 filesystems always expect an exact type match. V4 filesystems + * expect an exact match for user dquots and for non-root group and + * project dquots. + */ + if (xfs_sb_version_hascrc(&dqp->q_mount->m_sb) || + dqp_type == XFS_DQTYPE_USER || dqp->q_id != 0) + return ddqp_type == dqp_type; + + /* + * V4 filesystems support either group or project quotas, but not both + * at the same time. The non-user quota file can be switched between + * group and project quota uses depending on the mount options, which + * means that we can encounter the other type when we try to load quota + * defaults. Quotacheck will soon reset the the entire quota file + * (including the root dquot) anyway, but don't log scary corruption + * reports to dmesg. + */ + return ddqp_type == XFS_DQTYPE_GROUP || ddqp_type == XFS_DQTYPE_PROJ; +} + /* Copy the in-core quota fields in from the on-disk buffer. */ STATIC int xfs_dquot_from_disk( @@ -512,8 +548,7 @@ xfs_dquot_from_disk( * Ensure that we got the type and ID we were looking for. * Everything else was checked by the dquot buffer verifier. */ - if ((ddqp->d_type & XFS_DQTYPE_REC_MASK) != xfs_dquot_type(dqp) || - be32_to_cpu(ddqp->d_id) != dqp->q_id) { + if (!xfs_dquot_check_type(dqp, ddqp)) { xfs_alert_tag(bp->b_mount, XFS_PTAG_VERIFIER_ERROR, "Metadata corruption detected at %pS, quota %u", __this_address, dqp->q_id); diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 8c1fdf37ee8f..8ed47b739b6c 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -188,7 +188,7 @@ xfs_qm_dquot_logitem_release( STATIC void xfs_qm_dquot_logitem_committing( struct xfs_log_item *lip, - xfs_lsn_t commit_lsn) + xfs_csn_t seq) { return xfs_qm_dquot_logitem_release(lip); } diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 7f6e20899473..f9e2f606b5b8 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -293,6 +293,8 @@ xfs_errortag_add( struct xfs_mount *mp, unsigned int error_tag) { + BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX); + if (error_tag >= XFS_ERRTAG_MAX) return -EINVAL; diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 6c11bfc3d452..17aa77fbb5ad 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -482,7 +482,7 @@ xfs_extent_free_finish_item( free->xefi_startblock, free->xefi_blockcount, &free->xefi_oinfo, free->xefi_skip_discard); - kmem_free(free); + kmem_cache_free(xfs_bmap_free_item_zone, free); return error; } @@ -502,7 +502,7 @@ xfs_extent_free_cancel_item( struct xfs_extent_free_item *free; free = container_of(item, struct xfs_extent_free_item, xefi_list); - kmem_free(free); + kmem_cache_free(xfs_bmap_free_item_zone, free); } const struct xfs_defer_op_type xfs_extent_free_defer_type = { @@ -564,7 +564,7 @@ xfs_agfl_free_finish_item( extp->ext_len = free->xefi_blockcount; efdp->efd_next_extent++; - kmem_free(free); + kmem_cache_free(xfs_bmap_free_item_zone, free); return error; } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 111fe73bb8a7..c9045809b8c4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -118,6 +118,54 @@ xfs_dir_fsync( return xfs_log_force_inode(ip); } +static xfs_csn_t +xfs_fsync_seq( + struct xfs_inode *ip, + bool datasync) +{ + if (!xfs_ipincount(ip)) + return 0; + if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) + return 0; + return ip->i_itemp->ili_commit_seq; +} + +/* + * All metadata updates are logged, which means that we just have to flush the + * log up to the latest LSN that touched the inode. + * + * If we have concurrent fsync/fdatasync() calls, we need them to all block on + * the log force before we clear the ili_fsync_fields field. This ensures that + * we don't get a racing sync operation that does not wait for the metadata to + * hit the journal before returning. If we race with clearing ili_fsync_fields, + * then all that will happen is the log force will do nothing as the lsn will + * already be on disk. We can't race with setting ili_fsync_fields because that + * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock + * shared until after the ili_fsync_fields is cleared. + */ +static int +xfs_fsync_flush_log( + struct xfs_inode *ip, + bool datasync, + int *log_flushed) +{ + int error = 0; + xfs_csn_t seq; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + seq = xfs_fsync_seq(ip, datasync); + if (seq) { + error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, + log_flushed); + + spin_lock(&ip->i_itemp->ili_lock); + ip->i_itemp->ili_fsync_fields = 0; + spin_unlock(&ip->i_itemp->ili_lock); + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return error; +} + STATIC int xfs_file_fsync( struct file *file, @@ -125,13 +173,10 @@ xfs_file_fsync( loff_t end, int datasync) { - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_inode_log_item *iip = ip->i_itemp; + struct xfs_inode *ip = XFS_I(file->f_mapping->host); struct xfs_mount *mp = ip->i_mount; int error = 0; int log_flushed = 0; - xfs_lsn_t lsn = 0; trace_xfs_file_fsync(ip); @@ -155,33 +200,7 @@ xfs_file_fsync( else if (mp->m_logdev_targp != mp->m_ddev_targp) xfs_blkdev_issue_flush(mp->m_ddev_targp); - /* - * All metadata updates are logged, which means that we just have to - * flush the log up to the latest LSN that touched the inode. If we have - * concurrent fsync/fdatasync() calls, we need them to all block on the - * log force before we clear the ili_fsync_fields field. This ensures - * that we don't get a racing sync operation that does not wait for the - * metadata to hit the journal before returning. If we race with - * clearing the ili_fsync_fields, then all that will happen is the log - * force will do nothing as the lsn will already be on disk. We can't - * race with setting ili_fsync_fields because that is done under - * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared - * until after the ili_fsync_fields is cleared. - */ - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (xfs_ipincount(ip)) { - if (!datasync || - (iip->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) - lsn = iip->ili_last_lsn; - } - - if (lsn) { - error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); - spin_lock(&iip->ili_lock); - iip->ili_fsync_fields = 0; - spin_unlock(&iip->ili_lock); - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); + error = xfs_fsync_flush_log(ip, datasync, &log_flushed); /* * If we only have a single device, and the log force about was diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2bfbcf28b1bd..1f61e085676b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2754,7 +2754,7 @@ xfs_iunpin( trace_xfs_inode_unpin_nowait(ip, _RET_IP_); /* Give the log a push to start the unpinning I/O */ - xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL); + xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL); } @@ -3152,7 +3152,7 @@ xfs_rename( struct xfs_trans *tp; struct xfs_inode *wip = NULL; /* whiteout inode */ struct xfs_inode *inodes[__XFS_SORT_INODES]; - struct xfs_buf *agibp; + int i; int num_inodes = __XFS_SORT_INODES; bool new_parent = (src_dp != target_dp); bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); @@ -3170,7 +3170,6 @@ xfs_rename( * appropriately. */ if (flags & RENAME_WHITEOUT) { - ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); error = xfs_rename_alloc_whiteout(target_dp, &wip); if (error) return error; @@ -3265,6 +3264,30 @@ xfs_rename( } } + /* + * Lock the AGI buffers we need to handle bumping the nlink of the + * whiteout inode off the unlinked list and to handle dropping the + * nlink of the target inode. Per locking order rules, do this in + * increasing AG order and before directory block allocation tries to + * grab AGFs because we grab AGIs before AGFs. + * + * The (vfs) caller must ensure that if src is a directory then + * target_ip is either null or an empty directory. + */ + for (i = 0; i < num_inodes && inodes[i] != NULL; i++) { + if (inodes[i] == wip || + (inodes[i] == target_ip && + (VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) { + struct xfs_buf *bp; + xfs_agnumber_t agno; + + agno = XFS_INO_TO_AGNO(mp, inodes[i]->i_ino); + error = xfs_read_agi(mp, tp, agno, &bp); + if (error) + goto out_trans_cancel; + } + } + /* * Directory entry creation below may acquire the AGF. Remove * the whiteout from the unlinked list first to preserve correct @@ -3317,22 +3340,6 @@ xfs_rename( * In case there is already an entry with the same * name at the destination directory, remove it first. */ - - /* - * Check whether the replace operation will need to allocate - * blocks. This happens when the shortform directory lacks - * space and we have to convert it to a block format directory. - * When more blocks are necessary, we must lock the AGI first - * to preserve locking order (AGI -> AGF). - */ - if (xfs_dir2_sf_replace_needblock(target_dp, src_ip->i_ino)) { - error = xfs_read_agi(mp, tp, - XFS_INO_TO_AGNO(mp, target_ip->i_ino), - &agibp); - if (error) - goto out_trans_cancel; - } - error = xfs_dir_replace(tp, target_dp, target_name, src_ip->i_ino, spaceres); if (error) @@ -3709,16 +3716,16 @@ int xfs_log_force_inode( struct xfs_inode *ip) { - xfs_lsn_t lsn = 0; + xfs_csn_t seq = 0; xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) - lsn = ip->i_itemp->ili_last_lsn; + seq = ip->i_itemp->ili_commit_seq; xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (!lsn) + if (!seq) return 0; - return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL); + return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL); } /* diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 17e20a6d8b4e..3aba4559469f 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -28,6 +28,20 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_inode_log_item, ili_item); } +/* + * The logged size of an inode fork is always the current size of the inode + * fork. This means that when an inode fork is relogged, the size of the logged + * region is determined by the current state, not the combination of the + * previously logged state + the current state. This is different relogging + * behaviour to most other log items which will retain the size of the + * previously logged changes when smaller regions are relogged. + * + * Hence operations that remove data from the inode fork (e.g. shortform + * dir/attr remove, extent form extent removal, etc), the size of the relogged + * inode gets -smaller- rather than stays the same size as the previously logged + * size and this can result in the committing transaction reducing the amount of + * space being consumed by the CIL. + */ STATIC void xfs_inode_item_data_fork_size( struct xfs_inode_log_item *iip, @@ -603,9 +617,9 @@ xfs_inode_item_committed( STATIC void xfs_inode_item_committing( struct xfs_log_item *lip, - xfs_lsn_t commit_lsn) + xfs_csn_t seq) { - INODE_ITEM(lip)->ili_last_lsn = commit_lsn; + INODE_ITEM(lip)->ili_commit_seq = seq; return xfs_inode_item_release(lip); } diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 4b926e32831c..403b45ab9aa2 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -33,7 +33,7 @@ struct xfs_inode_log_item { unsigned int ili_fields; /* fields to be logged */ unsigned int ili_fsync_fields; /* logged since last fsync */ xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ - xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ + xfs_csn_t ili_commit_seq; /* last transaction commit */ }; static inline int xfs_inode_clean(struct xfs_inode *ip) diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index cb44f7653f03..538724f9f85c 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts( STATIC void xfs_log_dinode_to_disk( struct xfs_log_dinode *from, - struct xfs_dinode *to) + struct xfs_dinode *to, + xfs_lsn_t lsn) { to->di_magic = cpu_to_be16(from->di_magic); to->di_mode = cpu_to_be16(from->di_mode); @@ -182,7 +183,7 @@ xfs_log_dinode_to_disk( to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(from->di_ino); - to->di_lsn = cpu_to_be64(from->di_lsn); + to->di_lsn = cpu_to_be64(lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &from->di_uuid); to->di_flushiter = 0; @@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2( } /* - * If the inode has an LSN in it, recover the inode only if it's less - * than the lsn of the transaction we are replaying. Note: we still - * need to replay an owner change even though the inode is more recent - * than the transaction as there is no guarantee that all the btree - * blocks are more recent than this transaction, too. + * If the inode has an LSN in it, recover the inode only if the on-disk + * inode's LSN is older than the lsn of the transaction we are + * replaying. We can have multiple checkpoints with the same start LSN, + * so the current LSN being equal to the on-disk LSN doesn't necessarily + * mean that the on-disk inode is more recent than the change being + * replayed. + * + * We must check the current_lsn against the on-disk inode + * here because the we can't trust the log dinode to contain a valid LSN + * (see comment below before replaying the log dinode for details). + * + * Note: we still need to replay an owner change even though the inode + * is more recent than the transaction as there is no guarantee that all + * the btree blocks are more recent than this transaction, too. */ if (dip->di_version >= 3) { xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) { trace_xfs_log_recover_inode_skip(log, in_f); error = 0; goto out_owner_change; @@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2( goto out_release; } - /* recover the log dinode inode into the on disk inode */ - xfs_log_dinode_to_disk(ldip, dip); + /* + * Recover the log dinode inode into the on disk inode. + * + * The LSN in the log dinode is garbage - it can be zero or reflect + * stale in-memory runtime state that isn't coherent with the changes + * logged in this transaction or the changes written to the on-disk + * inode. Hence we write the current lSN into the inode because that + * matches what xfs_iflush() would write inode the inode when flushing + * the changes in this transaction. + */ + xfs_log_dinode_to_disk(ldip, dip, current_lsn); fields = in_f->ilf_fields; if (fields & XFS_ILOG_DEV) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7b9ff824e82d..74bc2beadc23 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -870,6 +870,9 @@ xfs_buffered_write_iomap_begin( int allocfork = XFS_DATA_FORK; int error = 0; + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + /* we can't use delayed allocations when using extent size hints */ if (xfs_get_extsz_hint(ip)) return xfs_direct_write_iomap_begin(inode, offset, count, diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 2a45138831e3..eae3aff9bc97 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -363,7 +363,7 @@ xfs_iwalk_run_callbacks( /* Delete cursor but remember the last record we cached... */ xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); irec = &iwag->recs[iwag->nr_recs - 1]; - ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK); + ASSERT(next_agino >= irec->ir_startino + XFS_INODES_PER_CHUNK); error = xfs_iwalk_ag_recs(iwag); if (error) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index fa2d05e65ff1..22d7d74231d4 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -347,6 +347,25 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type) tic->t_res_num++; } +bool +xfs_log_writable( + struct xfs_mount *mp) +{ + /* + * Never write to the log on norecovery mounts, if the block device is + * read-only, or if the filesystem is shutdown. Read-only mounts still + * allow internal writes for log recovery and unmount purposes, so don't + * restrict that case here. + */ + if (mp->m_flags & XFS_MOUNT_NORECOVERY) + return false; + if (xfs_readonly_buftarg(mp->m_log->l_targ)) + return false; + if (XFS_FORCED_SHUTDOWN(mp)) + return false; + return true; +} + /* * Replenish the byte reservation required by moving the grant write head. */ @@ -746,6 +765,9 @@ xfs_log_mount_finish( if (readonly) mp->m_flags |= XFS_MOUNT_RDONLY; + /* Make sure the log is dead if we're returning failure. */ + ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR)); + return error; } @@ -886,15 +908,8 @@ xfs_log_unmount_write( { struct xlog *log = mp->m_log; - /* - * Don't write out unmount record on norecovery mounts or ro devices. - * Or, if we are doing a forced umount (typically because of IO errors). - */ - if (mp->m_flags & XFS_MOUNT_NORECOVERY || - xfs_readonly_buftarg(log->l_targ)) { - ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); + if (!xfs_log_writable(mp)) return; - } xfs_log_force(mp, XFS_LOG_SYNC); @@ -3198,14 +3213,13 @@ xfs_log_force( } static int -__xfs_log_force_lsn( - struct xfs_mount *mp, +xlog_force_lsn( + struct xlog *log, xfs_lsn_t lsn, uint flags, int *log_flushed, bool already_slept) { - struct xlog *log = mp->m_log; struct xlog_in_core *iclog; spin_lock(&log->l_icloglock); @@ -3238,8 +3252,6 @@ __xfs_log_force_lsn( if (!already_slept && (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC || iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) { - XFS_STATS_INC(mp, xs_log_force_sleep); - xlog_wait(&iclog->ic_prev->ic_write_wait, &log->l_icloglock); return -EAGAIN; @@ -3277,25 +3289,29 @@ __xfs_log_force_lsn( * to disk, that thread will wake up all threads waiting on the queue. */ int -xfs_log_force_lsn( +xfs_log_force_seq( struct xfs_mount *mp, - xfs_lsn_t lsn, + xfs_csn_t seq, uint flags, int *log_flushed) { + struct xlog *log = mp->m_log; + xfs_lsn_t lsn; int ret; - ASSERT(lsn != 0); + ASSERT(seq != 0); XFS_STATS_INC(mp, xs_log_force); - trace_xfs_log_force(mp, lsn, _RET_IP_); + trace_xfs_log_force(mp, seq, _RET_IP_); - lsn = xlog_cil_force_lsn(mp->m_log, lsn); + lsn = xlog_cil_force_seq(log, seq); if (lsn == NULLCOMMITLSN) return 0; - ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false); - if (ret == -EAGAIN) - ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true); + ret = xlog_force_lsn(log, lsn, flags, log_flushed, false); + if (ret == -EAGAIN) { + XFS_STATS_INC(mp, xs_log_force_sleep); + ret = xlog_force_lsn(log, lsn, flags, log_flushed, true); + } return ret; } diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 58c3fcbec94a..a1089f8b7169 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -106,7 +106,7 @@ struct xfs_item_ops; struct xfs_trans; int xfs_log_force(struct xfs_mount *mp, uint flags); -int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags, +int xfs_log_force_seq(struct xfs_mount *mp, xfs_csn_t seq, uint flags, int *log_forced); int xfs_log_mount(struct xfs_mount *mp, struct xfs_buftarg *log_target, @@ -127,12 +127,11 @@ int xfs_log_reserve(struct xfs_mount *mp, int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); void xfs_log_unmount(struct xfs_mount *mp); int xfs_log_force_umount(struct xfs_mount *mp, int logerror); +bool xfs_log_writable(struct xfs_mount *mp); struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); void xfs_log_ticket_put(struct xlog_ticket *ticket); -void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_lsn_t *commit_lsn, bool regrant); void xlog_cil_process_committed(struct list_head *list); bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b0ef071b3cb5..fbe160d5e9b9 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -668,9 +668,14 @@ xlog_cil_push_work( ASSERT(push_seq <= ctx->sequence); /* - * Wake up any background push waiters now this context is being pushed. + * As we are about to switch to a new, empty CIL context, we no longer + * need to throttle tasks on CIL space overruns. Wake any waiters that + * the hard push throttle may have caught so they can start committing + * to the new context. The ctx->xc_push_lock provides the serialisation + * necessary for safely using the lockless waitqueue_active() check in + * this context. */ - if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) + if (waitqueue_active(&cil->xc_push_wait)) wake_up_all(&cil->xc_push_wait); /* @@ -772,7 +777,7 @@ xlog_cil_push_work( * that higher sequences will wait for us to write out a commit record * before they do. * - * xfs_log_force_lsn requires us to mirror the new sequence into the cil + * xfs_log_force_seq requires us to mirror the new sequence into the cil * structure atomically with the addition of this sequence to the * committing list. This also ensures that we can do unlocked checks * against the current sequence in log forces without risking @@ -907,7 +912,7 @@ xlog_cil_push_background( ASSERT(!list_empty(&cil->xc_cil)); /* - * don't do a background push if we haven't used up all the + * Don't do a background push if we haven't used up all the * space available yet. */ if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) { @@ -931,9 +936,16 @@ xlog_cil_push_background( /* * If we are well over the space limit, throttle the work that is being - * done until the push work on this context has begun. + * done until the push work on this context has begun. Enforce the hard + * throttle on all transaction commits once it has been activated, even + * if the committing transactions have resulted in the space usage + * dipping back down under the hard limit. + * + * The ctx->xc_push_lock provides the serialisation necessary for safely + * using the lockless waitqueue_active() check in this context. */ - if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) { + if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log) || + waitqueue_active(&cil->xc_push_wait)) { trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); ASSERT(cil->xc_ctx->space_used < log->l_logsize); xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock); @@ -1008,16 +1020,14 @@ xlog_cil_empty( * allowed again. */ void -xfs_log_commit_cil( - struct xfs_mount *mp, +xlog_cil_commit( + struct xlog *log, struct xfs_trans *tp, - xfs_lsn_t *commit_lsn, + xfs_csn_t *commit_seq, bool regrant) { - struct xlog *log = mp->m_log; struct xfs_cil *cil = log->l_cilp; struct xfs_log_item *lip, *next; - xfs_lsn_t xc_commit_lsn; /* * Do all necessary memory allocation before we lock the CIL. @@ -1031,10 +1041,6 @@ xfs_log_commit_cil( xlog_cil_insert_items(log, tp); - xc_commit_lsn = cil->xc_ctx->sequence; - if (commit_lsn) - *commit_lsn = xc_commit_lsn; - if (regrant && !XLOG_FORCED_SHUTDOWN(log)) xfs_log_ticket_regrant(log, tp->t_ticket); else @@ -1057,8 +1063,10 @@ xfs_log_commit_cil( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); if (lip->li_ops->iop_committing) - lip->li_ops->iop_committing(lip, xc_commit_lsn); + lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence); } + if (commit_seq) + *commit_seq = cil->xc_ctx->sequence; /* xlog_cil_push_background() releases cil->xc_ctx_lock */ xlog_cil_push_background(log); @@ -1075,9 +1083,9 @@ xfs_log_commit_cil( * iclog flush is necessary following this call. */ xfs_lsn_t -xlog_cil_force_lsn( +xlog_cil_force_seq( struct xlog *log, - xfs_lsn_t sequence) + xfs_csn_t sequence) { struct xfs_cil *cil = log->l_cilp; struct xfs_cil_ctx *ctx; @@ -1171,23 +1179,19 @@ xlog_cil_force_lsn( */ bool xfs_log_item_in_current_chkpt( - struct xfs_log_item *lip) + struct xfs_log_item *lip) { - struct xfs_cil_ctx *ctx; + struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp; if (list_empty(&lip->li_cil)) return false; - ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; - /* * li_seq is written on the first commit of a log item to record the * first checkpoint it is written to. Hence if it is different to the * current sequence, we're in a new checkpoint. */ - if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0) - return false; - return true; + return lip->li_seq == READ_ONCE(cil->xc_current_sequence); } /* diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 1c6fdbf3d506..42cd1602ac25 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -230,7 +230,7 @@ struct xfs_cil; struct xfs_cil_ctx { struct xfs_cil *cil; - xfs_lsn_t sequence; /* chkpt sequence # */ + xfs_csn_t sequence; /* chkpt sequence # */ xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ struct xlog_ticket *ticket; /* chkpt ticket */ @@ -268,10 +268,10 @@ struct xfs_cil { struct xfs_cil_ctx *xc_ctx; spinlock_t xc_push_lock ____cacheline_aligned_in_smp; - xfs_lsn_t xc_push_seq; + xfs_csn_t xc_push_seq; struct list_head xc_committing; wait_queue_head_t xc_commit_wait; - xfs_lsn_t xc_current_sequence; + xfs_csn_t xc_current_sequence; struct work_struct xc_push_work; wait_queue_head_t xc_push_wait; /* background push throttle */ } ____cacheline_aligned_in_smp; @@ -547,19 +547,18 @@ int xlog_cil_init(struct xlog *log); void xlog_cil_init_post_recovery(struct xlog *log); void xlog_cil_destroy(struct xlog *log); bool xlog_cil_empty(struct xlog *log); +void xlog_cil_commit(struct xlog *log, struct xfs_trans *tp, + xfs_csn_t *commit_seq, bool regrant); /* * CIL force routines */ -xfs_lsn_t -xlog_cil_force_lsn( - struct xlog *log, - xfs_lsn_t sequence); +xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence); static inline void xlog_cil_force(struct xlog *log) { - xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); + xlog_cil_force_seq(log, log->l_cilp->xc_current_sequence); } /* diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 87886b7f77da..69408782019e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2457,8 +2457,10 @@ xlog_finish_defer_ops( error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres, dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp); - if (error) + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); return error; + } /* * Transfer to this new transaction all the dfops we captured @@ -3454,6 +3456,7 @@ xlog_recover_finish( * this) before we get around to xfs_log_mount_cancel. */ xlog_recover_cancel_intents(log); + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); xfs_alert(log->l_mp, "Failed to recover intents"); return error; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 7110507a2b6b..a2a5a0fd9233 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -631,6 +631,47 @@ xfs_check_summary_counts( return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount); } +/* + * Flush and reclaim dirty inodes in preparation for unmount. Inodes and + * internal inode structures can be sitting in the CIL and AIL at this point, + * so we need to unpin them, write them back and/or reclaim them before unmount + * can proceed. + * + * An inode cluster that has been freed can have its buffer still pinned in + * memory because the transaction is still sitting in a iclog. The stale inodes + * on that buffer will be pinned to the buffer until the transaction hits the + * disk and the callbacks run. Pushing the AIL will skip the stale inodes and + * may never see the pinned buffer, so nothing will push out the iclog and + * unpin the buffer. + * + * Hence we need to force the log to unpin everything first. However, log + * forces don't wait for the discards they issue to complete, so we have to + * explicitly wait for them to complete here as well. + * + * Then we can tell the world we are unmounting so that error handling knows + * that the filesystem is going away and we should error out anything that we + * have been retrying in the background. This will prevent never-ending + * retries in AIL pushing from hanging the unmount. + * + * Finally, we can push the AIL to clean all the remaining dirty objects, then + * reclaim the remaining inodes that are still in memory at this point in time. + */ +static void +xfs_unmount_flush_inodes( + struct xfs_mount *mp) +{ + xfs_log_force(mp, XFS_LOG_SYNC); + xfs_extent_busy_wait_all(mp); + flush_workqueue(xfs_discard_wq); + + mp->m_flags |= XFS_MOUNT_UNMOUNTING; + + xfs_ail_push_all_sync(mp->m_ail); + cancel_delayed_work_sync(&mp->m_reclaim_work); + xfs_reclaim_inodes(mp); + xfs_health_unmount(mp); +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -927,9 +968,17 @@ xfs_mountfs( /* * Finish recovering the file system. This part needed to be delayed * until after the root and real-time bitmap inodes were consistently - * read in. + * read in. Temporarily create per-AG space reservations for metadata + * btree shape changes because space freeing transactions (for inode + * inactivation) require the per-AG reservation in lieu of reserving + * blocks. */ + error = xfs_fs_reserve_ag_blocks(mp); + if (error && error == -ENOSPC) + xfs_warn(mp, + "ENOSPC reserving per-AG metadata pool, log recovery may fail."); error = xfs_log_mount_finish(mp); + xfs_fs_unreserve_ag_blocks(mp); if (error) { xfs_warn(mp, "log mount finish failed"); goto out_rtunmount; @@ -1005,7 +1054,7 @@ xfs_mountfs( /* Clean out dquots that might be in memory after quotacheck. */ xfs_qm_unmount(mp); /* - * Cancel all delayed reclaim work and reclaim the inodes directly. + * Flush all inode reclamation work and flush the log. * We have to do this /after/ rtunmount and qm_unmount because those * two will have scheduled delayed reclaim for the rt/quota inodes. * @@ -1015,11 +1064,8 @@ xfs_mountfs( * qm_unmount_quotas and therefore rely on qm_unmount to release the * quota inodes. */ - cancel_delayed_work_sync(&mp->m_reclaim_work); - xfs_reclaim_inodes(mp); - xfs_health_unmount(mp); + xfs_unmount_flush_inodes(mp); out_log_dealloc: - mp->m_flags |= XFS_MOUNT_UNMOUNTING; xfs_log_mount_cancel(mp); out_fail_wait: if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) @@ -1060,47 +1106,7 @@ xfs_unmountfs( xfs_rtunmount_inodes(mp); xfs_irele(mp->m_rootip); - /* - * We can potentially deadlock here if we have an inode cluster - * that has been freed has its buffer still pinned in memory because - * the transaction is still sitting in a iclog. The stale inodes - * on that buffer will be pinned to the buffer until the - * transaction hits the disk and the callbacks run. Pushing the AIL will - * skip the stale inodes and may never see the pinned buffer, so - * nothing will push out the iclog and unpin the buffer. Hence we - * need to force the log here to ensure all items are flushed into the - * AIL before we go any further. - */ - xfs_log_force(mp, XFS_LOG_SYNC); - - /* - * Wait for all busy extents to be freed, including completion of - * any discard operation. - */ - xfs_extent_busy_wait_all(mp); - flush_workqueue(xfs_discard_wq); - - /* - * We now need to tell the world we are unmounting. This will allow - * us to detect that the filesystem is going away and we should error - * out anything that we have been retrying in the background. This will - * prevent neverending retries in AIL pushing from hanging the unmount. - */ - mp->m_flags |= XFS_MOUNT_UNMOUNTING; - - /* - * Flush all pending changes from the AIL. - */ - xfs_ail_push_all_sync(mp->m_ail); - - /* - * Reclaim all inodes. At this point there should be no dirty inodes and - * none should be pinned or locked. Stop background inode reclaim here - * if it is still running. - */ - cancel_delayed_work_sync(&mp->m_reclaim_work); - xfs_reclaim_inodes(mp); - xfs_health_unmount(mp); + xfs_unmount_flush_inodes(mp); xfs_qm_unmount(mp); @@ -1176,8 +1182,7 @@ xfs_fs_writable( int xfs_log_sbcount(xfs_mount_t *mp) { - /* allow this to proceed during the freeze sequence... */ - if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) + if (!xfs_log_writable(mp)) return 0; /* diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b2a9abee8b2b..64e5da33733b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1785,6 +1785,29 @@ xfs_qm_vop_chown( xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); + /* + * Back when we made quota reservations for the chown, we reserved the + * ondisk blocks + delalloc blocks with the new dquot. Now that we've + * switched the dquots, decrease the new dquot's block reservation + * (having already bumped up the real counter) so that we don't have + * any reservation to give back when we commit. + */ + xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS, + -ip->i_delayed_blks); + + /* + * Give the incore reservation for delalloc blocks back to the old + * dquot. We don't normally handle delalloc quota reservations + * transactionally, so just lock the dquot and subtract from the + * reservation. Dirty the transaction because it's too late to turn + * back now. + */ + tp->t_flags |= XFS_TRANS_DIRTY; + xfs_dqlock(prevdq); + ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks); + prevdq->q_blk.reserved -= ip->i_delayed_blks; + xfs_dqunlock(prevdq); + /* * Take an extra reference, because the inode is going to keep * this dquot pointer even after the trans_commit. @@ -1807,84 +1830,39 @@ xfs_qm_vop_chown_reserve( uint flags) { struct xfs_mount *mp = ip->i_mount; - uint64_t delblks; unsigned int blkflags; - struct xfs_dquot *udq_unres = NULL; - struct xfs_dquot *gdq_unres = NULL; - struct xfs_dquot *pdq_unres = NULL; struct xfs_dquot *udq_delblks = NULL; struct xfs_dquot *gdq_delblks = NULL; struct xfs_dquot *pdq_delblks = NULL; - int error; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - delblks = ip->i_delayed_blks; blkflags = XFS_IS_REALTIME_INODE(ip) ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; if (XFS_IS_UQUOTA_ON(mp) && udqp && - i_uid_read(VFS_I(ip)) != udqp->q_id) { + i_uid_read(VFS_I(ip)) != udqp->q_id) udq_delblks = udqp; - /* - * If there are delayed allocation blocks, then we have to - * unreserve those from the old dquot, and add them to the - * new dquot. - */ - if (delblks) { - ASSERT(ip->i_udquot); - udq_unres = ip->i_udquot; - } - } + if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp && - i_gid_read(VFS_I(ip)) != gdqp->q_id) { + i_gid_read(VFS_I(ip)) != gdqp->q_id) gdq_delblks = gdqp; - if (delblks) { - ASSERT(ip->i_gdquot); - gdq_unres = ip->i_gdquot; - } - } if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp && - ip->i_d.di_projid != pdqp->q_id) { + ip->i_d.di_projid != pdqp->q_id) pdq_delblks = pdqp; - if (delblks) { - ASSERT(ip->i_pdquot); - pdq_unres = ip->i_pdquot; - } - } - - error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, - udq_delblks, gdq_delblks, pdq_delblks, - ip->i_d.di_nblocks, 1, flags | blkflags); - if (error) - return error; /* - * Do the delayed blks reservations/unreservations now. Since, these - * are done without the help of a transaction, if a reservation fails - * its previous reservations won't be automatically undone by trans - * code. So, we have to do it manually here. + * Reserve enough quota to handle blocks on disk and reserved for a + * delayed allocation. We'll actually transfer the delalloc + * reservation between dquots at chown time, even though that part is + * only semi-transactional. */ - if (delblks) { - /* - * Do the reservations first. Unreservation can't fail. - */ - ASSERT(udq_delblks || gdq_delblks || pdq_delblks); - ASSERT(udq_unres || gdq_unres || pdq_unres); - error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, - udq_delblks, gdq_delblks, pdq_delblks, - (xfs_qcnt_t)delblks, 0, flags | blkflags); - if (error) - return error; - xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, - udq_unres, gdq_unres, pdq_unres, - -((xfs_qcnt_t)delblks), 0, blkflags); - } - - return 0; + return xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, udq_delblks, + gdq_delblks, pdq_delblks, + ip->i_d.di_nblocks + ip->i_delayed_blks, + 1, blkflags | flags); } int diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 6fa05fb78189..aa46b75d75af 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1503,7 +1503,8 @@ xfs_reflink_unshare( if (error) goto out; - error = filemap_write_and_wait_range(inode->i_mapping, offset, len); + error = filemap_write_and_wait_range(inode->i_mapping, offset, + offset + len - 1); if (error) goto out; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 6a7724a3560a..8c0c2df29d1c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -199,10 +199,12 @@ xfs_fs_show_options( seq_printf(m, ",swidth=%d", (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) - seq_puts(m, ",usrquota"); - else if (mp->m_qflags & XFS_UQUOTA_ACCT) - seq_puts(m, ",uqnoenforce"); + if (mp->m_qflags & XFS_UQUOTA_ACCT) { + if (mp->m_qflags & XFS_UQUOTA_ENFD) + seq_puts(m, ",usrquota"); + else + seq_puts(m, ",uqnoenforce"); + } if (mp->m_qflags & XFS_PQUOTA_ACCT) { if (mp->m_qflags & XFS_PQUOTA_ENFD) @@ -1153,6 +1155,22 @@ suffix_kstrtoint( return ret; } +static inline void +xfs_fs_warn_deprecated( + struct fs_context *fc, + struct fs_parameter *param, + uint64_t flag, + bool value) +{ + /* Don't print the warning if reconfiguring and current mount point + * already had the flag set + */ + if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && + !!(XFS_M(fc->root->d_sb)->m_flags & flag) == value) + return; + xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); +} + /* * Set mount state from a mount option. * @@ -1163,7 +1181,7 @@ xfs_fc_parse_param( struct fs_context *fc, struct fs_parameter *param) { - struct xfs_mount *mp = fc->s_fs_info; + struct xfs_mount *parsing_mp = fc->s_fs_info; struct fs_parse_result result; int size = 0; int opt; @@ -1174,142 +1192,142 @@ xfs_fc_parse_param( switch (opt) { case Opt_logbufs: - mp->m_logbufs = result.uint_32; + parsing_mp->m_logbufs = result.uint_32; return 0; case Opt_logbsize: - if (suffix_kstrtoint(param->string, 10, &mp->m_logbsize)) + if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) return -EINVAL; return 0; case Opt_logdev: - kfree(mp->m_logname); - mp->m_logname = kstrdup(param->string, GFP_KERNEL); - if (!mp->m_logname) + kfree(parsing_mp->m_logname); + parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); + if (!parsing_mp->m_logname) return -ENOMEM; return 0; case Opt_rtdev: - kfree(mp->m_rtname); - mp->m_rtname = kstrdup(param->string, GFP_KERNEL); - if (!mp->m_rtname) + kfree(parsing_mp->m_rtname); + parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); + if (!parsing_mp->m_rtname) return -ENOMEM; return 0; case Opt_allocsize: if (suffix_kstrtoint(param->string, 10, &size)) return -EINVAL; - mp->m_allocsize_log = ffs(size) - 1; - mp->m_flags |= XFS_MOUNT_ALLOCSIZE; + parsing_mp->m_allocsize_log = ffs(size) - 1; + parsing_mp->m_flags |= XFS_MOUNT_ALLOCSIZE; return 0; case Opt_grpid: case Opt_bsdgroups: - mp->m_flags |= XFS_MOUNT_GRPID; + parsing_mp->m_flags |= XFS_MOUNT_GRPID; return 0; case Opt_nogrpid: case Opt_sysvgroups: - mp->m_flags &= ~XFS_MOUNT_GRPID; + parsing_mp->m_flags &= ~XFS_MOUNT_GRPID; return 0; case Opt_wsync: - mp->m_flags |= XFS_MOUNT_WSYNC; + parsing_mp->m_flags |= XFS_MOUNT_WSYNC; return 0; case Opt_norecovery: - mp->m_flags |= XFS_MOUNT_NORECOVERY; + parsing_mp->m_flags |= XFS_MOUNT_NORECOVERY; return 0; case Opt_noalign: - mp->m_flags |= XFS_MOUNT_NOALIGN; + parsing_mp->m_flags |= XFS_MOUNT_NOALIGN; return 0; case Opt_swalloc: - mp->m_flags |= XFS_MOUNT_SWALLOC; + parsing_mp->m_flags |= XFS_MOUNT_SWALLOC; return 0; case Opt_sunit: - mp->m_dalign = result.uint_32; + parsing_mp->m_dalign = result.uint_32; return 0; case Opt_swidth: - mp->m_swidth = result.uint_32; + parsing_mp->m_swidth = result.uint_32; return 0; case Opt_inode32: - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + parsing_mp->m_flags |= XFS_MOUNT_SMALL_INUMS; return 0; case Opt_inode64: - mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; + parsing_mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; return 0; case Opt_nouuid: - mp->m_flags |= XFS_MOUNT_NOUUID; + parsing_mp->m_flags |= XFS_MOUNT_NOUUID; return 0; case Opt_largeio: - mp->m_flags |= XFS_MOUNT_LARGEIO; + parsing_mp->m_flags |= XFS_MOUNT_LARGEIO; return 0; case Opt_nolargeio: - mp->m_flags &= ~XFS_MOUNT_LARGEIO; + parsing_mp->m_flags &= ~XFS_MOUNT_LARGEIO; return 0; case Opt_filestreams: - mp->m_flags |= XFS_MOUNT_FILESTREAMS; + parsing_mp->m_flags |= XFS_MOUNT_FILESTREAMS; return 0; case Opt_noquota: - mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; - mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; - mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; + parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; + parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; + parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; return 0; case Opt_quota: case Opt_uquota: case Opt_usrquota: - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | XFS_UQUOTA_ENFD); return 0; case Opt_qnoenforce: case Opt_uqnoenforce: - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_UQUOTA_ENFD; + parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; return 0; case Opt_pquota: case Opt_prjquota: - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | XFS_PQUOTA_ENFD); return 0; case Opt_pqnoenforce: - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_PQUOTA_ENFD; + parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; return 0; case Opt_gquota: case Opt_grpquota: - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | XFS_GQUOTA_ENFD); return 0; case Opt_gqnoenforce: - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_GQUOTA_ENFD; + parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; return 0; case Opt_discard: - mp->m_flags |= XFS_MOUNT_DISCARD; + parsing_mp->m_flags |= XFS_MOUNT_DISCARD; return 0; case Opt_nodiscard: - mp->m_flags &= ~XFS_MOUNT_DISCARD; + parsing_mp->m_flags &= ~XFS_MOUNT_DISCARD; return 0; #ifdef CONFIG_FS_DAX case Opt_dax: - xfs_mount_set_dax_mode(mp, XFS_DAX_ALWAYS); + xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); return 0; case Opt_dax_enum: - xfs_mount_set_dax_mode(mp, result.uint_32); + xfs_mount_set_dax_mode(parsing_mp, result.uint_32); return 0; #endif /* Following mount options will be removed in September 2025 */ case Opt_ikeep: - xfs_warn(mp, "%s mount option is deprecated.", param->key); - mp->m_flags |= XFS_MOUNT_IKEEP; + xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, true); + parsing_mp->m_flags |= XFS_MOUNT_IKEEP; return 0; case Opt_noikeep: - xfs_warn(mp, "%s mount option is deprecated.", param->key); - mp->m_flags &= ~XFS_MOUNT_IKEEP; + xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, false); + parsing_mp->m_flags &= ~XFS_MOUNT_IKEEP; return 0; case Opt_attr2: - xfs_warn(mp, "%s mount option is deprecated.", param->key); - mp->m_flags |= XFS_MOUNT_ATTR2; + xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_ATTR2, true); + parsing_mp->m_flags |= XFS_MOUNT_ATTR2; return 0; case Opt_noattr2: - xfs_warn(mp, "%s mount option is deprecated.", param->key); - mp->m_flags &= ~XFS_MOUNT_ATTR2; - mp->m_flags |= XFS_MOUNT_NOATTR2; + xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_NOATTR2, true); + parsing_mp->m_flags &= ~XFS_MOUNT_ATTR2; + parsing_mp->m_flags |= XFS_MOUNT_NOATTR2; return 0; default: - xfs_warn(mp, "unknown mount option [%s].", param->key); + xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); return -EINVAL; } @@ -1693,7 +1711,10 @@ static int xfs_remount_ro( struct xfs_mount *mp) { - int error; + struct xfs_eofblocks eofb = { + .eof_flags = XFS_EOF_FLAGS_SYNC, + }; + int error; /* * Cancel background eofb scanning so it cannot race with the final @@ -1701,8 +1722,13 @@ xfs_remount_ro( */ xfs_stop_block_reaping(mp); - /* Get rid of any leftover CoW reservations... */ - error = xfs_icache_free_cowblocks(mp, NULL); + /* + * Clear out all remaining COW staging extents and speculative post-EOF + * preallocations so that we don't leave inodes requiring inactivation + * cleanups during reclaim on a read-only mount. We must process every + * cached inode, so this requires a synchronous cache scan. + */ + error = xfs_icache_free_cowblocks(mp, &eofb); if (error) { xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; @@ -1908,7 +1934,7 @@ xfs_init_zones(void) if (!xfs_ifork_zone) goto out_destroy_da_state_zone; - xfs_trans_zone = kmem_cache_create("xf_trans", + xfs_trans_zone = kmem_cache_create("xfs_trans", sizeof(struct xfs_trans), 0, 0, NULL); if (!xfs_trans_zone) diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 8e88a7ca387e..8d3abf06c54f 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -300,6 +300,7 @@ xfs_symlink( } ASSERT(pathlen == 0); } + i_size_write(VFS_I(ip), ip->i_d.di_size); /* * Create the directory entry for the symlink. diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c94e71f741b6..73a1de7ceefc 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -68,6 +68,7 @@ xfs_trans_free( xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); trace_xfs_trans_free(tp, _RET_IP_); + xfs_trans_clear_context(tp); if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); @@ -119,7 +120,8 @@ xfs_trans_dup( ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; - ntp->t_pflags = tp->t_pflags; + + xfs_trans_switch_context(tp, ntp); /* move deferred ops over to the new tp */ xfs_defer_move(ntp, tp); @@ -153,9 +155,6 @@ xfs_trans_reserve( int error = 0; bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; - /* Mark this thread as being in a transaction */ - current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - /* * Attempt to reserve the needed disk blocks by decrementing * the number needed from the number available. This will @@ -163,10 +162,8 @@ xfs_trans_reserve( */ if (blocks > 0) { error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); - if (error != 0) { - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + if (error != 0) return -ENOSPC; - } tp->t_blk_res += blocks; } @@ -240,9 +237,6 @@ xfs_trans_reserve( xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); tp->t_blk_res = 0; } - - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - return error; } @@ -266,6 +260,7 @@ xfs_trans_alloc( tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); + xfs_trans_set_context(tp); /* * Zero-reservation ("empty") transactions can't modify anything, so @@ -620,6 +615,9 @@ xfs_trans_unreserve_and_mod_sb( /* apply remaining deltas */ spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_fdblocks += tp->t_fdblocks_delta + tp->t_res_fdblocks_delta; + mp->m_sb.sb_icount += idelta; + mp->m_sb.sb_ifree += ifreedelta; mp->m_sb.sb_frextents += rtxdelta; mp->m_sb.sb_dblocks += tp->t_dblocks_delta; mp->m_sb.sb_agcount += tp->t_agcount_delta; @@ -834,7 +832,7 @@ __xfs_trans_commit( bool regrant) { struct xfs_mount *mp = tp->t_mountp; - xfs_lsn_t commit_lsn = -1; + xfs_csn_t commit_seq = 0; int error = 0; int sync = tp->t_flags & XFS_TRANS_SYNC; @@ -876,9 +874,8 @@ __xfs_trans_commit( xfs_trans_apply_sb_deltas(tp); xfs_trans_apply_dquot_deltas(tp); - xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); + xlog_cil_commit(mp->m_log, tp, &commit_seq, regrant); - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free(tp); /* @@ -886,7 +883,7 @@ __xfs_trans_commit( * log out now and wait for it. */ if (sync) { - error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); + error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL); XFS_STATS_INC(mp, xs_trans_sync); } else { XFS_STATS_INC(mp, xs_trans_async); @@ -910,7 +907,6 @@ __xfs_trans_commit( xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); tp->t_ticket = NULL; } - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free_items(tp, !!error); xfs_trans_free(tp); @@ -970,9 +966,6 @@ xfs_trans_cancel( tp->t_ticket = NULL; } - /* mark this thread as no longer being in a transaction */ - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - xfs_trans_free_items(tp, dirty); xfs_trans_free(tp); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 084658946cc8..97485559008b 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -43,7 +43,7 @@ struct xfs_log_item { struct list_head li_cil; /* CIL pointers */ struct xfs_log_vec *li_lv; /* active log vector */ struct xfs_log_vec *li_lv_shadow; /* standby vector */ - xfs_lsn_t li_seq; /* CIL commit seq */ + xfs_csn_t li_seq; /* CIL commit seq */ }; /* @@ -69,7 +69,7 @@ struct xfs_item_ops { void (*iop_pin)(struct xfs_log_item *); void (*iop_unpin)(struct xfs_log_item *, int remove); uint (*iop_push)(struct xfs_log_item *, struct list_head *); - void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn); + void (*iop_committing)(struct xfs_log_item *lip, xfs_csn_t seq); void (*iop_release)(struct xfs_log_item *); xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); int (*iop_recover)(struct xfs_log_item *lip, @@ -268,4 +268,34 @@ xfs_trans_item_relog( return lip->li_ops->iop_relog(lip, tp); } +static inline void +xfs_trans_set_context( + struct xfs_trans *tp) +{ + ASSERT(current->journal_info == NULL); + tp->t_pflags = memalloc_nofs_save(); + current->journal_info = tp; +} + +static inline void +xfs_trans_clear_context( + struct xfs_trans *tp) +{ + if (current->journal_info == tp) { + memalloc_nofs_restore(tp->t_pflags); + current->journal_info = NULL; + } +} + +static inline void +xfs_trans_switch_context( + struct xfs_trans *old_tp, + struct xfs_trans *new_tp) +{ + ASSERT(current->journal_info == old_tp); + new_tp->t_pflags = old_tp->t_pflags; + old_tp->t_pflags = 0; + current->journal_info = new_tp; +} + #endif /* __XFS_TRANS_H__ */ diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 8b275e2056ba..6b87aeaa7179 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -68,15 +68,49 @@ static inline void zonefs_i_size_write(struct inode *inode, loff_t isize) zi->i_flags &= ~ZONEFS_ZONE_OPEN; } -static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - unsigned int flags, struct iomap *iomap, - struct iomap *srcmap) +static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset, + loff_t length, unsigned int flags, + struct iomap *iomap, struct iomap *srcmap) { struct zonefs_inode_info *zi = ZONEFS_I(inode); struct super_block *sb = inode->i_sb; loff_t isize; - /* All I/Os should always be within the file maximum size */ + /* + * All blocks are always mapped below EOF. If reading past EOF, + * act as if there is a hole up to the file maximum size. + */ + mutex_lock(&zi->i_truncate_mutex); + iomap->bdev = inode->i_sb->s_bdev; + iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); + isize = i_size_read(inode); + if (iomap->offset >= isize) { + iomap->type = IOMAP_HOLE; + iomap->addr = IOMAP_NULL_ADDR; + iomap->length = length; + } else { + iomap->type = IOMAP_MAPPED; + iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; + iomap->length = isize - iomap->offset; + } + mutex_unlock(&zi->i_truncate_mutex); + + return 0; +} + +static const struct iomap_ops zonefs_read_iomap_ops = { + .iomap_begin = zonefs_read_iomap_begin, +}; + +static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset, + loff_t length, unsigned int flags, + struct iomap *iomap, struct iomap *srcmap) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + loff_t isize; + + /* All write I/Os should always be within the file maximum size */ if (WARN_ON_ONCE(offset + length > zi->i_max_size)) return -EIO; @@ -86,7 +120,7 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, * operation. */ if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ && - (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT))) + !(flags & IOMAP_DIRECT))) return -EIO; /* @@ -95,45 +129,42 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, * write pointer) and unwriten beyond. */ mutex_lock(&zi->i_truncate_mutex); - isize = i_size_read(inode); - if (offset >= isize) - iomap->type = IOMAP_UNWRITTEN; - else - iomap->type = IOMAP_MAPPED; - if (flags & IOMAP_WRITE) - length = zi->i_max_size - offset; - else - length = min(length, isize - offset); - mutex_unlock(&zi->i_truncate_mutex); - - iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); - iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset; iomap->bdev = inode->i_sb->s_bdev; + iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; + isize = i_size_read(inode); + if (iomap->offset >= isize) { + iomap->type = IOMAP_UNWRITTEN; + iomap->length = zi->i_max_size - iomap->offset; + } else { + iomap->type = IOMAP_MAPPED; + iomap->length = isize - iomap->offset; + } + mutex_unlock(&zi->i_truncate_mutex); return 0; } -static const struct iomap_ops zonefs_iomap_ops = { - .iomap_begin = zonefs_iomap_begin, +static const struct iomap_ops zonefs_write_iomap_ops = { + .iomap_begin = zonefs_write_iomap_begin, }; static int zonefs_readpage(struct file *unused, struct page *page) { - return iomap_readpage(page, &zonefs_iomap_ops); + return iomap_readpage(page, &zonefs_read_iomap_ops); } static void zonefs_readahead(struct readahead_control *rac) { - iomap_readahead(rac, &zonefs_iomap_ops); + iomap_readahead(rac, &zonefs_read_iomap_ops); } /* * Map blocks for page writeback. This is used only on conventional zone files, * which implies that the page range can only be within the fixed inode size. */ -static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc, - struct inode *inode, loff_t offset) +static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, + struct inode *inode, loff_t offset) { struct zonefs_inode_info *zi = ZONEFS_I(inode); @@ -147,12 +178,12 @@ static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc, offset < wpc->iomap.offset + wpc->iomap.length) return 0; - return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset, - IOMAP_WRITE, &wpc->iomap, NULL); + return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset, + IOMAP_WRITE, &wpc->iomap, NULL); } static const struct iomap_writeback_ops zonefs_writeback_ops = { - .map_blocks = zonefs_map_blocks, + .map_blocks = zonefs_write_map_blocks, }; static int zonefs_writepage(struct page *page, struct writeback_control *wbc) @@ -182,7 +213,8 @@ static int zonefs_swap_activate(struct swap_info_struct *sis, return -EINVAL; } - return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops); + return iomap_swapfile_activate(sis, swap_file, span, + &zonefs_read_iomap_ops); } static const struct address_space_operations zonefs_file_aops = { @@ -612,7 +644,7 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) /* Serialize against truncates */ down_read(&zi->i_mmap_sem); - ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); + ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops); up_read(&zi->i_mmap_sem); sb_end_pagefault(inode->i_sb); @@ -869,7 +901,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) if (append) ret = zonefs_file_dio_append(iocb, from); else - ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops, + ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, &zonefs_write_dio_ops, sync); if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && (ret > 0 || ret == -EIOCBQUEUED)) { @@ -911,7 +943,7 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, if (ret <= 0) goto inode_unlock; - ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops); + ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops); if (ret > 0) iocb->ki_pos += ret; else if (ret == -EIO) @@ -1004,7 +1036,7 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) goto inode_unlock; } file_accessed(iocb->ki_filp); - ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops, + ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops, &zonefs_read_dio_ops, is_sync_kiocb(iocb)); } else { ret = generic_file_read_iter(iocb, to); @@ -1706,11 +1738,6 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; sbi->s_max_open_zones = bdev_max_open_zones(sb->s_bdev); atomic_set(&sbi->s_open_zones, 0); - if (!sbi->s_max_open_zones && - sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { - zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n"); - sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; - } ret = zonefs_read_super(sb); if (ret) @@ -1729,6 +1756,12 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) zonefs_info(sb, "Mounting %u zones", blkdev_nr_zones(sb->s_bdev->bd_disk)); + if (!sbi->s_max_open_zones && + sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { + zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n"); + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; + } + /* Create root directory inode */ ret = -ENOMEM; inode = new_inode(sb); diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h index bc3fb59442ce..4e30e1799e61 100644 --- a/include/crypto/blake2s.h +++ b/include/crypto/blake2s.h @@ -101,7 +101,4 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key, blake2s_final(&state, out); } -void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, - const size_t keylen); - #endif /* _CRYPTO_BLAKE2S_H */ diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index dabaee698718..b3ea73b81944 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -47,12 +47,19 @@ static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) hchacha_block_generic(state, out, nrounds); } +enum chacha_constants { /* expand 32-byte k */ + CHACHA_CONSTANT_EXPA = 0x61707865U, + CHACHA_CONSTANT_ND_3 = 0x3320646eU, + CHACHA_CONSTANT_2_BY = 0x79622d32U, + CHACHA_CONSTANT_TE_K = 0x6b206574U +}; + static inline void chacha_init_consts(u32 *state) { - state[0] = 0x61707865; /* "expa" */ - state[1] = 0x3320646e; /* "nd 3" */ - state[2] = 0x79622d32; /* "2-by" */ - state[3] = 0x6b206574; /* "te k" */ + state[0] = CHACHA_CONSTANT_EXPA; + state[1] = CHACHA_CONSTANT_ND_3; + state[2] = CHACHA_CONSTANT_2_BY; + state[3] = CHACHA_CONSTANT_TE_K; } void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv); diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index c4165126937e..a6c3b8e7deb6 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -105,6 +105,12 @@ struct drbg_test_data { struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ }; +enum drbg_seed_state { + DRBG_SEED_STATE_UNSEEDED, + DRBG_SEED_STATE_PARTIAL, /* Seeded with !rng_is_initialized() */ + DRBG_SEED_STATE_FULL, +}; + struct drbg_state { struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ @@ -127,16 +133,14 @@ struct drbg_state { struct crypto_wait ctr_wait; /* CTR mode async wait obj */ struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ - bool seeded; /* DRBG fully seeded? */ + enum drbg_seed_state seeded; /* DRBG fully seeded? */ bool pr; /* Prediction resistance enabled? */ bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ - struct work_struct seed_work; /* asynchronous seeding support */ struct crypto_rng *jent; const struct drbg_state_ops *d_ops; const struct drbg_core *core; struct drbg_string test_data; - struct random_ready_callback random_ready; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 8e50d487500f..52363eee2b20 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -11,11 +11,11 @@ #include #include -void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, +void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, size_t nblocks, const u32 inc); -void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, - size_t nblocks, const u32 inc); +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc); bool blake2s_selftest(void); @@ -24,14 +24,11 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state) state->f[0] = -1; } -typedef void (*blake2s_compress_t)(struct blake2s_state *state, - const u8 *block, size_t nblocks, u32 inc); - /* Helper functions for BLAKE2s shared by the library and shash APIs */ -static inline void __blake2s_update(struct blake2s_state *state, - const u8 *in, size_t inlen, - blake2s_compress_t compress) +static __always_inline void +__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, + bool force_generic) { const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; @@ -39,7 +36,12 @@ static inline void __blake2s_update(struct blake2s_state *state, return; if (inlen > fill) { memcpy(state->buf + state->buflen, in, fill); - (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); + if (force_generic) + blake2s_compress_generic(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); + else + blake2s_compress(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); state->buflen = 0; in += fill; inlen -= fill; @@ -47,7 +49,12 @@ static inline void __blake2s_update(struct blake2s_state *state, if (inlen > BLAKE2S_BLOCK_SIZE) { const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); /* Hash one less (full) block than strictly possible */ - (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); + if (force_generic) + blake2s_compress_generic(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); + else + blake2s_compress(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); } @@ -55,13 +62,16 @@ static inline void __blake2s_update(struct blake2s_state *state, state->buflen += inlen; } -static inline void __blake2s_final(struct blake2s_state *state, u8 *out, - blake2s_compress_t compress) +static __always_inline void +__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic) { blake2s_set_lastblock(state); memset(state->buf + state->buflen, 0, BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ - (*compress)(state, state->buf, 1, state->buflen); + if (force_generic) + blake2s_compress_generic(state, state->buf, 1, state->buflen); + else + blake2s_compress(state, state->buf, 1, state->buflen); cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); memcpy(out, state->h, state->outlen); } @@ -99,20 +109,20 @@ static inline int crypto_blake2s_init(struct shash_desc *desc) static inline int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, unsigned int inlen, - blake2s_compress_t compress) + bool force_generic) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_update(state, in, inlen, compress); + __blake2s_update(state, in, inlen, force_generic); return 0; } static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, - blake2s_compress_t compress) + bool force_generic) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_final(state, out, compress); + __blake2s_final(state, out, force_generic); return 0; } diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 4e035aca6f7e..6093fa6db260 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -41,6 +41,22 @@ #define __bf_shf(x) (__builtin_ffsll(x) - 1) +#define __scalar_type_to_unsigned_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (unsigned type)0 + +#define __unsigned_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (unsigned char)0, \ + __scalar_type_to_unsigned_cases(char), \ + __scalar_type_to_unsigned_cases(short), \ + __scalar_type_to_unsigned_cases(int), \ + __scalar_type_to_unsigned_cases(long), \ + __scalar_type_to_unsigned_cases(long long), \ + default: (x))) + +#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) + #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ ({ \ BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ @@ -49,7 +65,8 @@ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ _pfx "value too large for the field"); \ - BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ + BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ + __bf_cast_unsigned(_reg, ~0ull), \ _pfx "type of reg too small for mask"); \ __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ (1ULL << __bf_shf(_mask))); \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f33c425cdb71..c24d077c7a1b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1472,6 +1472,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -1686,6 +1689,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, return -ENOTSUPP; } +static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + static inline void bpf_map_put(struct bpf_map *map) { } @@ -1745,6 +1755,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, struct net_device *netdev); bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); +void unpriv_ebpf_notify(int new_state); + #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 83fa08a06507..787fff5ec7f5 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -287,6 +287,9 @@ struct ceph_osd_linger_request { rados_watcherrcb_t errcb; void *data; + struct ceph_pagelist *request_pl; + struct page **notify_id_pages; + struct page ***preply_pages; size_t *preply_len; }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 22ae91f8165f..b62f893fc0f1 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -65,6 +65,11 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev, extern ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, + char *buf); +extern ssize_t cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/dim.h b/include/linux/dim.h index b698266d0035..6c5733981563 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -21,7 +21,7 @@ * We consider 10% difference as significant. */ #define IS_SIGNIFICANT_DIFF(val, ref) \ - (((100UL * abs((val) - (ref))) / (ref)) > 10) + ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10)) /* * Calculate the gap between two values. diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 03befff63eda..393aa55f5452 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -61,14 +61,6 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) -/* - * This is a hint to the DMA-mapping subsystem that the device is expected - * to overwrite the entire mapped size, thus the caller does not require any - * of the previous buffer contents to be preserved. This allows - * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers. - */ -#define DMA_ATTR_OVERWRITE (1UL << 10) - /* * DMA_ATTR_SYS_CACHE_ONLY: used to indicate that the buffer should be mapped * with the correct memory attributes so that it can be cached in the system diff --git a/include/linux/efi.h b/include/linux/efi.h index e17cd4c44f93..3bac68fb7ff1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -167,6 +167,8 @@ struct capsule_info { size_t page_bytes_remain; }; +int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, + size_t hdr_bytes); int __efi_capsule_setup_info(struct capsule_info *cap_info); typedef int (*efi_freemem_callback_t) (u64 start, u64 end, void *arg); diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index ff5596dd30f8..2382dec6d6ab 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -15,6 +15,8 @@ void fbcon_new_modelist(struct fb_info *info); void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); void fbcon_fb_blanked(struct fb_info *info, int blank); +int fbcon_modechange_possible(struct fb_info *info, + struct fb_var_screeninfo *var); void fbcon_update_vcs(struct fb_info *info, bool all); void fbcon_remap_all(struct fb_info *info); int fbcon_set_con2fb_map_ioctl(void __user *argp); @@ -33,6 +35,8 @@ static inline void fbcon_new_modelist(struct fb_info *info) {} static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} +static inline int fbcon_modechange_possible(struct fb_info *info, + struct fb_var_screeninfo *var) { return 0; } static inline void fbcon_update_vcs(struct fb_info *info, bool all) {} static inline void fbcon_remap_all(struct fb_info *info) {} static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; } diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8e6dd908da21..aa1d4da03538 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); /** Unregister a Hardware Random Number Generator driver. */ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); -/** Feed random bits into the pool. */ -extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 33e939977444..c16a9dda3ad5 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -228,6 +228,7 @@ struct st_sensor_settings { * @hw_irq_trigger: if we're using the hardware interrupt on the sensor. * @hw_timestamp: Latest timestamp from the interrupt handler, when in use. * @buffer_data: Data used by buffer part. + * @odr_lock: Local lock for preventing concurrent ODR accesses/changes */ struct st_sensor_data { struct device *dev; @@ -253,6 +254,8 @@ struct st_sensor_data { s64 hw_timestamp; char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; + + struct mutex odr_lock; }; #ifdef CONFIG_IIO_BUFFER diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 6175b3e0d220..e203fdb020c4 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -270,9 +270,9 @@ extern void static_key_disable_cpuslocked(struct static_key *key); #include #include -static inline int static_key_count(struct static_key *key) +static __always_inline int static_key_count(struct static_key *key) { - return atomic_read(&key->enabled); + return arch_atomic_read(&key->enabled); } static __always_inline void jump_label_init(void) diff --git a/include/linux/kd.h b/include/linux/kd.h deleted file mode 100644 index b130a18f860f..000000000000 --- a/include/linux/kd.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_KD_H -#define _LINUX_KD_H - -#include - -#define KD_FONT_FLAG_OLD 0x80000000 /* Invoked via old interface [compat] */ -#endif /* _LINUX_KD_H */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 5f61389f5f36..a1f12e959bba 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -187,14 +187,6 @@ void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len); void *arch_kexec_kernel_image_load(struct kimage *image); -int arch_kexec_apply_relocations_add(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab); -int arch_kexec_apply_relocations(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab); int arch_kimage_file_post_load_cleanup(struct kimage *image); #ifdef CONFIG_KEXEC_SIG int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, @@ -223,6 +215,44 @@ extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mend); extern int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, void **addr, unsigned long *sz); + +#ifndef arch_kexec_apply_relocations_add +/* + * arch_kexec_apply_relocations_add - apply relocations of type RELA + * @pi: Purgatory to be relocated. + * @section: Section relocations applying to. + * @relsec: Section containing RELAs. + * @symtab: Corresponding symtab. + * + * Return: 0 on success, negative errno on error. + */ +static inline int +arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, + const Elf_Shdr *relsec, const Elf_Shdr *symtab) +{ + pr_err("RELA relocation unsupported.\n"); + return -ENOEXEC; +} +#endif + +#ifndef arch_kexec_apply_relocations +/* + * arch_kexec_apply_relocations - apply relocations of type REL + * @pi: Purgatory to be relocated. + * @section: Section relocations applying to. + * @relsec: Section containing RELs. + * @symtab: Corresponding symtab. + * + * Return: 0 on success, negative errno on error. + */ +static inline int +arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section, + const Elf_Shdr *relsec, const Elf_Shdr *symtab) +{ + pr_err("REL relocation unsupported.\n"); + return -ENOEXEC; +} +#endif #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_KEXEC_ELF @@ -412,6 +442,12 @@ static inline int kexec_crash_loaded(void) { return 0; } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ +#ifdef CONFIG_KEXEC_SIG +void set_kexec_sig_enforced(void); +#else +static inline void set_kexec_sig_enforced(void) {} +#endif + #endif /* !defined(__ASSEBMLY__) */ #endif /* LINUX_KEXEC_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c66c702a4f07..439fbe0ee0c7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -988,7 +988,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) { } -static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) { return false; } diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 37dc5594081d..4bd58de49222 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -313,7 +313,7 @@ LSM_HOOK(int, 0, secmark_relabel_packet, u32 secid) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_inc, void) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void) LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req, - struct flowi *fl) + struct flowi_common *flic) LSM_HOOK(int, 0, tun_dev_alloc_security, void **security) LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security) LSM_HOOK(int, 0, tun_dev_create, void) @@ -353,7 +353,7 @@ LSM_HOOK(int, 0, xfrm_state_delete_security, struct xfrm_state *x) LSM_HOOK(int, 0, xfrm_policy_lookup, struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) LSM_HOOK(int, 1, xfrm_state_pol_flow_match, struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl) + struct xfrm_policy *xp, const struct flowi_common *flic) LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid, int ckall) #endif /* CONFIG_SECURITY_NETWORK_XFRM */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 0ad8aa657914..613928e8a9ea 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1114,7 +1114,7 @@ * @xfrm_state_pol_flow_match: * @x contains the state to match. * @xp contains the policy to check for a match. - * @fl contains the flow to check for a match. + * @flic contains the flowi_common struct to check for a match. * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 0661af17a758..1e0205811394 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -86,6 +86,8 @@ struct cmos_rtc_board_info { /* 2 values for divider stage reset, others for "testing purposes only" */ # define RTC_DIV_RESET1 0x60 # define RTC_DIV_RESET2 0x70 + /* In AMD BKDG bit 5 and 6 are reserved, bit 4 is for select dv0 bank */ +# define RTC_AMD_BANK_SELECT 0x10 /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ # define RTC_RATE_SELECT 0x0F diff --git a/include/linux/memregion.h b/include/linux/memregion.h index e11595256cac..c04c4fd2e209 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -16,7 +16,7 @@ static inline int memregion_alloc(gfp_t gfp) { return -ENOMEM; } -void memregion_free(int id) +static inline void memregion_free(int id) { } #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index eba1f1cbc9fb..6ca97729b54a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4877,12 +4877,11 @@ struct mlx5_ifc_query_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x20]; - u8 ece[0x20]; + u8 reserved_at_40[0x40]; u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; diff --git a/include/linux/mm.h b/include/linux/mm.h index 420a7706641e..1a64ab7ba67a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2735,6 +2735,7 @@ extern int install_special_mapping(struct mm_struct *mm, unsigned long flags, struct page **pages); unsigned long randomize_stack_top(unsigned long stack_top); +unsigned long randomize_page(unsigned long start, unsigned long range); extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index fd1ecb821106..d88bb56c18e2 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -286,6 +286,7 @@ struct cfi_private { map_word sector_erase_cmd; unsigned long chipshift; /* Because they're of the same type */ const char *im_name; /* inter_module name for cmdset_setup */ + unsigned long quirks; struct flchip chips[]; /* per-chip data structure for each chip */ }; diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index ac398e143c9a..2a63ef05a6cc 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -42,11 +42,11 @@ * void nodes_shift_right(dst, src, n) Shift right * void nodes_shift_left(dst, src, n) Shift left * - * int first_node(mask) Number lowest set bit, or MAX_NUMNODES - * int next_node(node, mask) Next node past 'node', or MAX_NUMNODES - * int next_node_in(node, mask) Next node past 'node', or wrap to first, + * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES + * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES + * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, * or MAX_NUMNODES - * int first_unset_node(mask) First node not set in mask, or + * unsigned int first_unset_node(mask) First node not set in mask, or * MAX_NUMNODES * * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set @@ -153,7 +153,7 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) #define node_test_and_set(node, nodemask) \ __node_test_and_set((node), &(nodemask)) -static inline int __node_test_and_set(int node, nodemask_t *addr) +static inline bool __node_test_and_set(int node, nodemask_t *addr) { return test_and_set_bit(node, addr->bits); } @@ -200,7 +200,7 @@ static inline void __nodes_complement(nodemask_t *dstp, #define nodes_equal(src1, src2) \ __nodes_equal(&(src1), &(src2), MAX_NUMNODES) -static inline int __nodes_equal(const nodemask_t *src1p, +static inline bool __nodes_equal(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_equal(src1p->bits, src2p->bits, nbits); @@ -208,7 +208,7 @@ static inline int __nodes_equal(const nodemask_t *src1p, #define nodes_intersects(src1, src2) \ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) -static inline int __nodes_intersects(const nodemask_t *src1p, +static inline bool __nodes_intersects(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_intersects(src1p->bits, src2p->bits, nbits); @@ -216,20 +216,20 @@ static inline int __nodes_intersects(const nodemask_t *src1p, #define nodes_subset(src1, src2) \ __nodes_subset(&(src1), &(src2), MAX_NUMNODES) -static inline int __nodes_subset(const nodemask_t *src1p, +static inline bool __nodes_subset(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_subset(src1p->bits, src2p->bits, nbits); } #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) -static inline int __nodes_empty(const nodemask_t *srcp, unsigned int nbits) +static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) { return bitmap_empty(srcp->bits, nbits); } #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) -static inline int __nodes_full(const nodemask_t *srcp, unsigned int nbits) +static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) { return bitmap_full(srcp->bits, nbits); } @@ -260,15 +260,15 @@ static inline void __nodes_shift_left(nodemask_t *dstp, > MAX_NUMNODES, then the silly min_ts could be dropped. */ #define first_node(src) __first_node(&(src)) -static inline int __first_node(const nodemask_t *srcp) +static inline unsigned int __first_node(const nodemask_t *srcp) { - return min_t(int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); + return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); } #define next_node(n, src) __next_node((n), &(src)) -static inline int __next_node(int n, const nodemask_t *srcp) +static inline unsigned int __next_node(int n, const nodemask_t *srcp) { - return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); + return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } /* @@ -276,7 +276,7 @@ static inline int __next_node(int n, const nodemask_t *srcp) * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) -int __next_node_in(int node, const nodemask_t *srcp); +unsigned int __next_node_in(int node, const nodemask_t *srcp); static inline void init_nodemask_of_node(nodemask_t *mask, int node) { @@ -296,9 +296,9 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node) }) #define first_unset_node(mask) __first_unset_node(&(mask)) -static inline int __first_unset_node(const nodemask_t *maskp) +static inline unsigned int __first_unset_node(const nodemask_t *maskp) { - return min_t(int,MAX_NUMNODES, + return min_t(unsigned int, MAX_NUMNODES, find_first_zero_bit(maskp->bits, MAX_NUMNODES)); } @@ -375,14 +375,13 @@ static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, } #if MAX_NUMNODES > 1 -#define for_each_node_mask(node, mask) \ - for ((node) = first_node(mask); \ - (node) < MAX_NUMNODES; \ - (node) = next_node((node), (mask))) +#define for_each_node_mask(node, mask) \ + for ((node) = first_node(mask); \ + (node >= 0) && (node) < MAX_NUMNODES; \ + (node) = next_node((node), (mask))) #else /* MAX_NUMNODES == 1 */ -#define for_each_node_mask(node, mask) \ - if (!nodes_empty(mask)) \ - for ((node) = 0; (node) < 1; (node)++) +#define for_each_node_mask(node, mask) \ + for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++) #endif /* MAX_NUMNODES */ /* @@ -436,11 +435,11 @@ static inline int num_node_state(enum node_states state) #define first_online_node first_node(node_states[N_ONLINE]) #define first_memory_node first_node(node_states[N_MEMORY]) -static inline int next_online_node(int nid) +static inline unsigned int next_online_node(int nid) { return next_node(nid, node_states[N_ONLINE]); } -static inline int next_memory_node(int nid) +static inline unsigned int next_memory_node(int nid) { return next_node(nid, node_states[N_MEMORY]); } diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 577f51436cf9..662f19374bd9 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -29,11 +29,19 @@ struct unwind_hint { * * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that * sp_reg+sp_offset points to the iret return frame. + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 -#define UNWIND_HINT_TYPE_RET_OFFSET 3 +#define UNWIND_HINT_TYPE_FUNC 3 +#define UNWIND_HINT_TYPE_ENTRY 4 +#define UNWIND_HINT_TYPE_SAVE 5 +#define UNWIND_HINT_TYPE_RESTORE 6 #ifdef CONFIG_STACK_VALIDATION @@ -96,7 +104,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -120,7 +128,7 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD(func) #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm #endif diff --git a/include/linux/pci.h b/include/linux/pci.h index 9d9c198be7d0..3abe5ddb5c03 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1622,6 +1622,13 @@ static inline bool pci_aer_available(void) { return false; } bool pci_ats_disabled(void); +#ifdef CONFIG_PCIE_PTM +int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); +#else +static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity) +{ return -EINVAL; } +#endif + void pci_cfg_access_lock(struct pci_dev *dev); bool pci_cfg_access_trylock(struct pci_dev *dev); void pci_cfg_access_unlock(struct pci_dev *dev); diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 02599687770c..7f03e02c48cd 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -216,6 +216,9 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, int cros_ec_check_result(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); +int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, + struct cros_ec_command *msg); + int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 30091ab5de28..718600e83020 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,7 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); -extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle); +extern void pm_runtime_release_supplier(struct device_link *link); /** * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. @@ -280,8 +280,7 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} -static inline void pm_runtime_release_supplier(struct device_link *link, - bool check_idle) {} +static inline void pm_runtime_release_supplier(struct device_link *link) {} #endif /* !CONFIG_PM */ diff --git a/include/linux/prandom.h b/include/linux/prandom.h index 056d31317e49..a4aadd2dc153 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -10,6 +10,7 @@ #include #include +#include u32 prandom_u32(void); void prandom_bytes(void *buf, size_t nbytes); @@ -27,15 +28,10 @@ DECLARE_PER_CPU(unsigned long, net_rand_noise); * The core SipHash round function. Each line can be executed in * parallel given enough CPU resources. */ -#define PRND_SIPROUND(v0, v1, v2, v3) ( \ - v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ - v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ - v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ - v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ -) +#define PRND_SIPROUND(v0, v1, v2, v3) SIPHASH_PERMUTATION(v0, v1, v2, v3) -#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) -#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) +#define PRND_K0 (SIPHASH_CONST_0 ^ SIPHASH_CONST_2) +#define PRND_K1 (SIPHASH_CONST_1 ^ SIPHASH_CONST_3) #elif BITS_PER_LONG == 32 /* @@ -43,14 +39,9 @@ DECLARE_PER_CPU(unsigned long, net_rand_noise); * This is weaker, but 32-bit machines are not used for high-traffic * applications, so there is less output for an attacker to analyze. */ -#define PRND_SIPROUND(v0, v1, v2, v3) ( \ - v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ - v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ - v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ - v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ -) -#define PRND_K0 0x6c796765 -#define PRND_K1 0x74656462 +#define PRND_SIPROUND(v0, v1, v2, v3) HSIPHASH_PERMUTATION(v0, v1, v2, v3) +#define PRND_K0 (HSIPHASH_CONST_0 ^ HSIPHASH_CONST_2) +#define PRND_K1 (HSIPHASH_CONST_1 ^ HSIPHASH_CONST_3) #else #error Unsupported BITS_PER_LONG diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 2a9df80ea887..ae7dbdfa3d83 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 -#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ #define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ @@ -47,12 +46,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) -/* single stepping state bits (used on ARM and PA-RISC) */ -#define PT_SINGLESTEP_BIT 31 -#define PT_SINGLESTEP (1< -struct random_ready_callback { - struct list_head list; - void (*func)(struct random_ready_callback *rdy); - struct module *owner; -}; +struct notifier_block; -extern void add_device_randomness(const void *, unsigned int); -extern void add_bootloader_randomness(const void *, unsigned int); +void add_device_randomness(const void *buf, unsigned int len); +void add_bootloader_randomness(const void *buf, size_t len); +void add_input_randomness(unsigned int type, unsigned int code, + unsigned int value) __latent_entropy; +void add_interrupt_randomness(int irq) __latent_entropy; +void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) { - add_device_randomness((const void *)&latent_entropy, - sizeof(latent_entropy)); + add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); } #else -static inline void add_latent_entropy(void) {} -#endif - -extern void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) __latent_entropy; -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; - -extern void get_random_bytes(void *buf, int nbytes); -extern int wait_for_random_bytes(void); -extern int __init rand_initialize(void); -extern bool rng_is_initialized(void); -extern int add_random_ready_callback(struct random_ready_callback *rdy); -extern void del_random_ready_callback(struct random_ready_callback *rdy); -extern int __must_check get_random_bytes_arch(void *buf, int nbytes); - -#ifndef MODULE -extern const struct file_operations random_fops, urandom_fops; +static inline void add_latent_entropy(void) { } #endif +void get_random_bytes(void *buf, int len); +int __must_check get_random_bytes_arch(void *buf, int len); u32 get_random_u32(void); u64 get_random_u64(void); static inline unsigned int get_random_int(void) @@ -80,36 +61,38 @@ static inline unsigned long get_random_long(void) static inline unsigned long get_random_canary(void) { - unsigned long val = get_random_long(); - - return val & CANARY_MASK; + return get_random_long() & CANARY_MASK; } +int __init random_init(const char *command_line); +bool rng_is_initialized(void); +int wait_for_random_bytes(void); +int register_random_ready_notifier(struct notifier_block *nb); +int unregister_random_ready_notifier(struct notifier_block *nb); + /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ -static inline int get_random_bytes_wait(void *buf, int nbytes) +static inline int get_random_bytes_wait(void *buf, size_t nbytes) { int ret = wait_for_random_bytes(); get_random_bytes(buf, nbytes); return ret; } -#define declare_get_random_var_wait(var) \ - static inline int get_random_ ## var ## _wait(var *out) { \ +#define declare_get_random_var_wait(name, ret_type) \ + static inline int get_random_ ## name ## _wait(ret_type *out) { \ int ret = wait_for_random_bytes(); \ if (unlikely(ret)) \ return ret; \ - *out = get_random_ ## var(); \ + *out = get_random_ ## name(); \ return 0; \ } -declare_get_random_var_wait(u32) -declare_get_random_var_wait(u64) -declare_get_random_var_wait(int) -declare_get_random_var_wait(long) +declare_get_random_var_wait(u32, u32) +declare_get_random_var_wait(u64, u32) +declare_get_random_var_wait(int, unsigned int) +declare_get_random_var_wait(long, unsigned long) #undef declare_get_random_var -unsigned long randomize_page(unsigned long start, unsigned long range); - /* * This is designed to be standalone for just prandom * users, but for now we include it from @@ -120,22 +103,10 @@ unsigned long randomize_page(unsigned long start, unsigned long range); #ifdef CONFIG_ARCH_RANDOM # include #else -static inline bool __must_check arch_get_random_long(unsigned long *v) -{ - return false; -} -static inline bool __must_check arch_get_random_int(unsigned int *v) -{ - return false; -} -static inline bool __must_check arch_get_random_seed_long(unsigned long *v) -{ - return false; -} -static inline bool __must_check arch_get_random_seed_int(unsigned int *v) -{ - return false; -} +static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; } +static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; } +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return false; } +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return false; } #endif /* @@ -158,4 +129,26 @@ static inline bool __init arch_get_random_long_early(unsigned long *v) } #endif +#ifdef CONFIG_SMP +int random_prepare_cpu(unsigned int cpu); +int random_online_cpu(unsigned int cpu); +#endif + +#ifndef MODULE +extern const struct file_operations random_fops, urandom_fops; +#endif + +/* + * Android KABI fixups + * Added back the following structure and calls to preserve the ABI for + * out-of-tree drivers that were using them. + */ +struct random_ready_callback { + struct list_head list; + void (*func)(struct random_ready_callback *rdy); + struct module *owner; +}; +extern int add_random_ready_callback(struct random_ready_callback *rdy); +extern void del_random_ready_callback(struct random_ready_callback *rdy); + #endif /* _LINUX_RANDOM_H */ diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index b676aa419eef..f0e535f199be 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -23,12 +23,16 @@ struct ratelimit_state { unsigned long flags; }; -#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ - .interval = interval_init, \ - .burst = burst_init, \ +#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + .flags = flags_init, \ } +#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ + RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0) + #define RATELIMIT_STATE_INIT_DISABLED \ RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h index 159729cffd8e..3247ed8e9ff0 100644 --- a/include/linux/rtsx_usb.h +++ b/include/linux/rtsx_usb.h @@ -54,8 +54,6 @@ struct rtsx_ucr { struct usb_device *pusb_dev; struct usb_interface *pusb_intf; struct usb_sg_request current_sg; - unsigned char *iobuf; - dma_addr_t iobuf_dma; struct timer_list sg_timer; struct mutex dev_mutex; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index cea4bdfd0f05..df9505faed27 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -82,7 +82,7 @@ static inline void exit_thread(struct task_struct *tsk) extern void do_group_exit(int); extern void exit_files(struct task_struct *); -extern void exit_itimers(struct signal_struct *); +extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *fork_idle(int); diff --git a/include/linux/security.h b/include/linux/security.h index a87cbacab078..694db31a46e1 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -121,10 +121,12 @@ enum lockdown_reason { LOCKDOWN_DEBUGFS, LOCKDOWN_XMON_WR, LOCKDOWN_BPF_WRITE_USER, + LOCKDOWN_DBG_WRITE_KERNEL, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, LOCKDOWN_BPF_READ, + LOCKDOWN_DBG_READ_KERNEL, LOCKDOWN_PERF, LOCKDOWN_TRACEFS, LOCKDOWN_XMON_RW, @@ -166,7 +168,7 @@ struct sk_buff; struct sock; struct sockaddr; struct socket; -struct flowi; +struct flowi_common; struct dst_entry; struct xfrm_selector; struct xfrm_policy; @@ -1369,8 +1371,9 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u int security_sk_alloc(struct sock *sk, int family, gfp_t priority); void security_sk_free(struct sock *sk); void security_sk_clone(const struct sock *sk, struct sock *newsk); -void security_sk_classify_flow(struct sock *sk, struct flowi *fl); -void security_req_classify_flow(const struct request_sock *req, struct flowi *fl); +void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic); +void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic); void security_sock_graft(struct sock*sk, struct socket *parent); int security_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -1521,11 +1524,13 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) +static inline void security_sk_classify_flow(struct sock *sk, + struct flowi_common *flic) { } -static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +static inline void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic) { } @@ -1652,9 +1657,9 @@ void security_xfrm_state_free(struct xfrm_state *x); int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int security_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl); + const struct flowi_common *flic); int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid); -void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl); +void security_skb_classify_flow(struct sk_buff *skb, struct flowi_common *flic); #else /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -1706,7 +1711,8 @@ static inline int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_s } static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl) + struct xfrm_policy *xp, + const struct flowi_common *flic) { return 1; } @@ -1716,7 +1722,8 @@ static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) return 0; } -static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +static inline void security_skb_classify_flow(struct sk_buff *skb, + struct flowi_common *flic) { } diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 52004d800c93..60558ae1b7e5 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -403,6 +403,11 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif +static inline bool uart_console_enabled(struct uart_port *port) +{ + return uart_console(port) && (port->cons->flags & CON_ENABLED); +} + struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, diff --git a/include/linux/siphash.h b/include/linux/siphash.h index 0cda61855d90..0bb5ecd507be 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -136,4 +136,32 @@ static inline u32 hsiphash(const void *data, size_t len, return ___hsiphash_aligned(data, len, key); } +/* + * These macros expose the raw SipHash and HalfSipHash permutations. + * Do not use them directly! If you think you have a use for them, + * be sure to CC the maintainer of this file explaining why. + */ + +#define SIPHASH_PERMUTATION(a, b, c, d) ( \ + (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \ + (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \ + (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \ + (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32)) + +#define SIPHASH_CONST_0 0x736f6d6570736575ULL +#define SIPHASH_CONST_1 0x646f72616e646f6dULL +#define SIPHASH_CONST_2 0x6c7967656e657261ULL +#define SIPHASH_CONST_3 0x7465646279746573ULL + +#define HSIPHASH_PERMUTATION(a, b, c, d) ( \ + (a) += (b), (b) = rol32((b), 5), (b) ^= (a), (a) = rol32((a), 16), \ + (c) += (d), (d) = rol32((d), 8), (d) ^= (c), \ + (a) += (d), (d) = rol32((d), 7), (d) ^= (a), \ + (c) += (b), (b) = rol32((b), 13), (b) ^= (c), (c) = rol32((c), 16)) + +#define HSIPHASH_CONST_0 0U +#define HSIPHASH_CONST_1 0U +#define HSIPHASH_CONST_2 0x6c796765U +#define HSIPHASH_CONST_3 0x74656462U + #endif /* _LINUX_SIPHASH_H */ diff --git a/include/linux/timex.h b/include/linux/timex.h index ce0859763670..2efab9a806a9 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -62,6 +62,8 @@ #include #include +unsigned long random_get_entropy_fallback(void); + #include #ifndef random_get_entropy @@ -74,8 +76,14 @@ * * By default we use get_cycles() for this purpose, but individual * architectures may override this in their asm/timex.h header file. + * If a given arch does not have get_cycles(), then we fallback to + * using random_get_entropy_fallback(). */ -#define random_get_entropy() get_cycles() +#ifdef get_cycles +#define random_get_entropy() ((unsigned long)get_cycles()) +#else +#define random_get_entropy() random_get_entropy_fallback() +#endif #endif /* diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index 767f62086bd9..c326bfdb5ec2 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -12,7 +12,6 @@ extern int tty_insert_flip_string_fixed_flag(struct tty_port *port, extern int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, size_t size); extern void tty_flip_buffer_push(struct tty_port *port); -void tty_schedule_flip(struct tty_port *port); int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); static inline int tty_insert_flip_char(struct tty_port *port, @@ -40,4 +39,7 @@ static inline int tty_insert_flip_string(struct tty_port *port, extern void tty_buffer_lock_exclusive(struct tty_port *port); extern void tty_buffer_unlock_exclusive(struct tty_port *port); +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t cnt); + #endif /* _LINUX_TTY_FLIP_H */ diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 2ffafbdcda07..d14b39d1418e 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -125,6 +125,7 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ +#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -135,6 +136,7 @@ struct usb_hcd { #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) +#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default diff --git a/include/net/addrconf.h b/include/net/addrconf.h index dd2e03d39f38..3e5375054eb8 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -417,6 +417,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) { const struct inet6_dev *idev = __in6_dev_get(dev); + if (unlikely(!idev)) + return true; + return !!idev->cnf.ignore_routes_with_linkdown; } diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 3fecc4a411a1..355835639ae5 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -422,6 +422,71 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, return NULL; } +/* Shall not be called with lock_sock held */ +static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, + struct msghdr *msg, + size_t len, size_t mtu, + size_t headroom, size_t tailroom) +{ + struct sk_buff *skb; + size_t size = min_t(size_t, len, mtu); + int err; + + skb = bt_skb_send_alloc(sk, size + headroom + tailroom, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return ERR_PTR(err); + + skb_reserve(skb, headroom); + skb_tailroom_reserve(skb, mtu, tailroom); + + if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) { + kfree_skb(skb); + return ERR_PTR(-EFAULT); + } + + skb->priority = sk->sk_priority; + + return skb; +} + +/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments + * accourding to the MTU. + */ +static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, + struct msghdr *msg, + size_t len, size_t mtu, + size_t headroom, size_t tailroom) +{ + struct sk_buff *skb, **frag; + + skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); + if (IS_ERR_OR_NULL(skb)) + return skb; + + len -= skb->len; + if (!len) + return skb; + + /* Add remaining data over MTU as continuation fragments */ + frag = &skb_shinfo(skb)->frag_list; + while (len) { + struct sk_buff *tmp; + + tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); + if (IS_ERR(tmp)) { + return skb; + } + + len -= tmp->len; + + *frag = tmp; + frag = &(*frag)->next; + } + + return skb; +} + int bt_to_errno(u16 code); void hci_sock_set_flag(struct sock *sk, int nr); diff --git a/include/net/flow.h b/include/net/flow.h index b2531df3f65f..39d0cedcddee 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -195,11 +195,21 @@ static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) return container_of(fl4, struct flowi, u.ip4); } +static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4) +{ + return &(flowi4_to_flowi(fl4)->u.__fl_common); +} + static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6) { return container_of(fl6, struct flowi, u.ip6); } +static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6) +{ + return &(flowi6_to_flowi(fl6)->u.__fl_common); +} + static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) { return container_of(fldn, struct flowi, u.dn); diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 010d58159887..9a58274e6217 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -568,5 +568,6 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)); +bool flow_indr_dev_exists(void); #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 8bf5906073bc..e03ba8e80781 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -64,6 +64,14 @@ struct inet6_ifaddr { struct hlist_node addr_lst; struct list_head if_list; + /* + * Used to safely traverse idev->addr_list in process context + * if the idev->lock needed to protect idev->addr_list cannot be held. + * In that case, add the items to this list temporarily and iterate + * without holding idev->lock. + * See addrconf_ifdown and dev_forward_change. + */ + struct list_head if_list_aux; struct list_head tmp_list; struct inet6_ifaddr *ifpub; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 8177cb94958c..5f7ee70dd488 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -322,7 +322,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 3 +#define TCP_PINGPONG_THRESH 1 static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { @@ -339,14 +339,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; } -static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_ack.pingpong < U8_MAX) - icsk->icsk_ack.pingpong++; -} - static inline bool inet_csk_has_ulp(struct sock *sk) { return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ca6a3ea9057e..d4d611064a76 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -419,7 +419,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) } int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 89163ef8cf4b..3c039d4b0e48 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) + if (!sk->sk_mark && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; return sk->sk_mark; @@ -369,7 +370,7 @@ static inline bool inet_get_convert_csum(struct sock *sk) static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { - return net->ipv4.sysctl_ip_nonlocal_bind || + return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || inet->freebind || inet->transparent; } diff --git a/include/net/ip.h b/include/net/ip.h index db3a2eb144b3..d26bac5dbb3e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -360,7 +360,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { - return port < net->ipv4.sysctl_ip_prot_sock; + return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock); } #else @@ -392,7 +392,7 @@ void ipfrag_init(void); void ip_static_sysctl_init(void); #define IP4_REPLY_MARK(net, mark) \ - ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) static inline bool ip_is_fragment(const struct iphdr *iph) { @@ -453,7 +453,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); unsigned int mtu; - if (net->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) return dst_mtu(dst); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 09f2efea0b97..5805fe4947f3 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -59,8 +59,13 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) int ret = NF_ACCEPT; if (ct) { - if (!nf_ct_is_confirmed(ct)) + if (!nf_ct_is_confirmed(ct)) { ret = __nf_conntrack_confirm(skb); + + if (ret == NF_ACCEPT) + ct = (struct nf_conn *)skb_nfct(skb); + } + if (likely(ret == NF_ACCEPT)) nf_ct_deliver_cached_events(ct); } diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 76bfb6cd5815..b9948e7861f2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -203,11 +203,11 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); -int nft_validate_register_load(enum nft_registers reg, unsigned int len); -int nft_validate_register_store(const struct nft_ctx *ctx, - enum nft_registers reg, - const struct nft_data *data, - enum nft_data_types type, unsigned int len); +int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len); +int nft_parse_register_store(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *dreg, + const struct nft_data *data, + enum nft_data_types type, unsigned int len); /** * struct nft_userdata - user defined data associated with an object @@ -1013,7 +1013,6 @@ struct nft_stats { struct nft_hook { struct list_head list; - bool inactive; struct nf_hook_ops ops; struct rcu_head rcu; }; diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 8657e6815b07..fd10a7862fdc 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -26,21 +26,21 @@ void nf_tables_core_module_exit(void); struct nft_bitwise_fast_expr { u32 mask; u32 xor; - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; }; struct nft_cmp_fast_expr { u32 data; u32 mask; - enum nft_registers sreg:8; + u8 sreg; u8 len; bool inv; }; struct nft_immediate_expr { struct nft_data data; - enum nft_registers dreg:8; + u8 dreg; u8 dlen; }; @@ -60,14 +60,14 @@ struct nft_payload { enum nft_payload_bases base:8; u8 offset; u8 len; - enum nft_registers dreg:8; + u8 dreg; }; struct nft_payload_set { enum nft_payload_bases base:8; u8 offset; u8 len; - enum nft_registers sreg:8; + u8 sreg; u8 csum_type; u8 csum_offset; u8 csum_flags; diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index 7a453a35a41d..1058f38e2aca 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -91,7 +91,7 @@ int nft_flow_rule_offload_commit(struct net *net); NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ memset(&(__reg)->mask, 0xff, (__reg)->len); -int nft_chain_offload_priority(struct nft_base_chain *basechain); +bool nft_chain_offload_support(const struct nft_base_chain *basechain); int nft_offload_init(void); void nft_offload_exit(void); diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 628b6fa579cd..237f3757637e 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -5,7 +5,7 @@ #include struct nft_fib { - enum nft_registers dreg:8; + u8 dreg; u8 result; u32 flags; }; diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 07e2fd507963..2dce55c736f4 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -7,8 +7,8 @@ struct nft_meta { enum nft_meta_keys key:8; union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 dreg; + u8 sreg; }; }; diff --git a/include/net/raw.h b/include/net/raw.h index 8ad8df594853..c51a635671a7 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/route.h b/include/net/route.h index a07c277cd33e..2551f3f03b37 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -165,7 +165,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } @@ -322,7 +322,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, ip_rt_put(rt); flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); } - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } @@ -338,7 +338,7 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable flowi4_update_output(fl4, sk->sk_bound_dev_if, RT_CONN_FLAGS(sk), fl4->daddr, fl4->saddr); - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(sock_net(sk), fl4, sk); } return rt; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 20f676de14ce..e2c70fd90a46 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -166,37 +166,17 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) if (spin_trylock(&qdisc->seqlock)) goto nolock_empty; - /* Paired with smp_mb__after_atomic() to make sure - * STATE_MISSED checking is synchronized with clearing - * in pfifo_fast_dequeue(). + /* No need to insist if the MISSED flag was already set. + * Note that test_and_set_bit() also gives us memory ordering + * guarantees wrt potential earlier enqueue() and below + * spin_trylock(), both of which are necessary to prevent races */ - smp_mb__before_atomic(); - - /* If the MISSED flag is set, it means other thread has - * set the MISSED flag before second spin_trylock(), so - * we can return false here to avoid multi cpus doing - * the set_bit() and second spin_trylock() concurrently. - */ - if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) + if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state)) return false; - /* Set the MISSED flag before the second spin_trylock(), - * if the second spin_trylock() return false, it means - * other cpu holding the lock will do dequeuing for us - * or it will see the MISSED flag set after releasing - * lock and reschedule the net_tx_action() to do the - * dequeuing. - */ - set_bit(__QDISC_STATE_MISSED, &qdisc->state); - - /* spin_trylock() only has load-acquire semantic, so use - * smp_mb__after_atomic() to ensure STATE_MISSED is set - * before doing the second spin_trylock(). - */ - smp_mb__after_atomic(); - - /* Retry again in case other CPU may not see the new flag - * after it releases the lock at the end of qdisc_run_end(). + /* Try to take the lock again to make sure that we will either + * grab it or the CPU that still has it will see MISSED set + * when testing it in qdisc_run_end() */ if (!spin_trylock(&qdisc->seqlock)) return false; @@ -220,6 +200,12 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); + /* spin_unlock() only has store-release semantic. The unlock + * and test_bit() ordering is a store-load ordering, so a full + * memory barrier is needed here. + */ + smp_mb(); + if (unlikely(test_bit(__QDISC_STATE_MISSED, &qdisc->state))) { clear_bit(__QDISC_STATE_MISSED, &qdisc->state); diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index d7d2495f83c2..dac91aa38c5a 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -4,8 +4,8 @@ #include -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); diff --git a/include/net/sock.h b/include/net/sock.h index eb64ca0f01f3..0be6e328af74 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1466,7 +1466,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long val = sk->sk_prot->sysctl_mem[index]; + long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); #if PAGE_SIZE > SK_MEM_QUANTUM val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; diff --git a/include/net/tcp.h b/include/net/tcp.h index 820773c029d6..04294585691d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1385,8 +1385,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || - ca_ops->cong_control) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || + tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) @@ -1401,7 +1401,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : @@ -1452,21 +1452,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; + return tp->keepalive_intvl ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; + return tp->keepalive_time ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; + return tp->keepalive_probes ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) @@ -1479,7 +1482,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) @@ -1974,7 +1978,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; + return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } /* @wake is one when sk_stream_write_space() calls us. diff --git a/include/net/tls.h b/include/net/tls.h index c875c23ad173..419a67c50fc2 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -721,7 +721,7 @@ int tls_sw_fallback_init(struct sock *sk, struct tls_crypto_info *crypto_info); #ifdef CONFIG_TLS_DEVICE -void tls_device_init(void); +int tls_device_init(void); void tls_device_cleanup(void); void tls_device_sk_destruct(struct sock *sk); int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); @@ -741,7 +741,7 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) return tls_get_ctx(sk)->rx_conf == TLS_HW; } #else -static inline void tls_device_init(void) {} +static inline int tls_device_init(void) { return 0; } static inline void tls_device_cleanup(void) {} static inline int diff --git a/include/net/udp.h b/include/net/udp.h index 4017f257628f..010bc324f860 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -259,7 +259,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index fac8e89aed81..310e0dbffda9 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -249,7 +249,8 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *, struct fc_frame *); /* libfcoe funcs */ -u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN], unsigned int, unsigned int); +u64 fcoe_wwn_from_mac(unsigned char mac[ETH_ALEN], unsigned int scheme, + unsigned int port); int fcoe_libfc_config(struct fc_lport *, struct fcoe_ctlr *, const struct libfc_function_template *, int init_fcp); u32 fcoe_fc_crc(struct fc_frame *fp); diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h index ab69434e2329..72e785a903b6 100644 --- a/include/trace/events/libata.h +++ b/include/trace/events/libata.h @@ -249,6 +249,7 @@ DECLARE_EVENT_CLASS(ata_qc_complete_template, __entry->hob_feature = qc->result_tf.hob_feature; __entry->nsect = qc->result_tf.nsect; __entry->hob_nsect = qc->result_tf.hob_nsect; + __entry->flags = qc->flags; ), TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \ diff --git a/include/trace/events/random.h b/include/trace/events/random.h deleted file mode 100644 index 9570a10cb949..000000000000 --- a/include/trace/events/random.h +++ /dev/null @@ -1,330 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM random - -#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_RANDOM_H - -#include -#include - -TRACE_EVENT(add_device_randomness, - TP_PROTO(int bytes, unsigned long IP), - - TP_ARGS(bytes, IP), - - TP_STRUCT__entry( - __field( int, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("bytes %d caller %pS", - __entry->bytes, (void *)__entry->IP) -); - -DECLARE_EVENT_CLASS(random__mix_pool_bytes, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), - - TP_ARGS(pool_name, bytes, IP), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, bytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->bytes = bytes; - __entry->IP = IP; - ), - - TP_printk("%s pool: bytes %d caller %pS", - __entry->pool_name, __entry->bytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), - - TP_ARGS(pool_name, bytes, IP) -); - -DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, - TP_PROTO(const char *pool_name, int bytes, unsigned long IP), - - TP_ARGS(pool_name, bytes, IP) -); - -TRACE_EVENT(credit_entropy_bits, - TP_PROTO(const char *pool_name, int bits, int entropy_count, - unsigned long IP), - - TP_ARGS(pool_name, bits, entropy_count, IP), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, bits ) - __field( int, entropy_count ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->bits = bits; - __entry->entropy_count = entropy_count; - __entry->IP = IP; - ), - - TP_printk("%s pool: bits %d entropy_count %d caller %pS", - __entry->pool_name, __entry->bits, - __entry->entropy_count, (void *)__entry->IP) -); - -TRACE_EVENT(push_to_pool, - TP_PROTO(const char *pool_name, int pool_bits, int input_bits), - - TP_ARGS(pool_name, pool_bits, input_bits), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, pool_bits ) - __field( int, input_bits ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->pool_bits = pool_bits; - __entry->input_bits = input_bits; - ), - - TP_printk("%s: pool_bits %d input_pool_bits %d", - __entry->pool_name, __entry->pool_bits, - __entry->input_bits) -); - -TRACE_EVENT(debit_entropy, - TP_PROTO(const char *pool_name, int debit_bits), - - TP_ARGS(pool_name, debit_bits), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, debit_bits ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->debit_bits = debit_bits; - ), - - TP_printk("%s: debit_bits %d", __entry->pool_name, - __entry->debit_bits) -); - -TRACE_EVENT(add_input_randomness, - TP_PROTO(int input_bits), - - TP_ARGS(input_bits), - - TP_STRUCT__entry( - __field( int, input_bits ) - ), - - TP_fast_assign( - __entry->input_bits = input_bits; - ), - - TP_printk("input_pool_bits %d", __entry->input_bits) -); - -TRACE_EVENT(add_disk_randomness, - TP_PROTO(dev_t dev, int input_bits), - - TP_ARGS(dev, input_bits), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, input_bits ) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->input_bits = input_bits; - ), - - TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev), - MINOR(__entry->dev), __entry->input_bits) -); - -TRACE_EVENT(xfer_secondary_pool, - TP_PROTO(const char *pool_name, int xfer_bits, int request_bits, - int pool_entropy, int input_entropy), - - TP_ARGS(pool_name, xfer_bits, request_bits, pool_entropy, - input_entropy), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, xfer_bits ) - __field( int, request_bits ) - __field( int, pool_entropy ) - __field( int, input_entropy ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->xfer_bits = xfer_bits; - __entry->request_bits = request_bits; - __entry->pool_entropy = pool_entropy; - __entry->input_entropy = input_entropy; - ), - - TP_printk("pool %s xfer_bits %d request_bits %d pool_entropy %d " - "input_entropy %d", __entry->pool_name, __entry->xfer_bits, - __entry->request_bits, __entry->pool_entropy, - __entry->input_entropy) -); - -DECLARE_EVENT_CLASS(random__get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP), - - TP_STRUCT__entry( - __field( int, nbytes ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->nbytes = nbytes; - __entry->IP = IP; - ), - - TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes, - TP_PROTO(int nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, - TP_PROTO(int nbytes, unsigned long IP), - - TP_ARGS(nbytes, IP) -); - -DECLARE_EVENT_CLASS(random__extract_entropy, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), - - TP_ARGS(pool_name, nbytes, entropy_count, IP), - - TP_STRUCT__entry( - __field( const char *, pool_name ) - __field( int, nbytes ) - __field( int, entropy_count ) - __field(unsigned long, IP ) - ), - - TP_fast_assign( - __entry->pool_name = pool_name; - __entry->nbytes = nbytes; - __entry->entropy_count = entropy_count; - __entry->IP = IP; - ), - - TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", - __entry->pool_name, __entry->nbytes, __entry->entropy_count, - (void *)__entry->IP) -); - - -DEFINE_EVENT(random__extract_entropy, extract_entropy, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), - - TP_ARGS(pool_name, nbytes, entropy_count, IP) -); - -DEFINE_EVENT(random__extract_entropy, extract_entropy_user, - TP_PROTO(const char *pool_name, int nbytes, int entropy_count, - unsigned long IP), - - TP_ARGS(pool_name, nbytes, entropy_count, IP) -); - -TRACE_EVENT(random_read, - TP_PROTO(int got_bits, int need_bits, int pool_left, int input_left), - - TP_ARGS(got_bits, need_bits, pool_left, input_left), - - TP_STRUCT__entry( - __field( int, got_bits ) - __field( int, need_bits ) - __field( int, pool_left ) - __field( int, input_left ) - ), - - TP_fast_assign( - __entry->got_bits = got_bits; - __entry->need_bits = need_bits; - __entry->pool_left = pool_left; - __entry->input_left = input_left; - ), - - TP_printk("got_bits %d still_needed_bits %d " - "blocking_pool_entropy_left %d input_entropy_left %d", - __entry->got_bits, __entry->got_bits, __entry->pool_left, - __entry->input_left) -); - -TRACE_EVENT(urandom_read, - TP_PROTO(int got_bits, int pool_left, int input_left), - - TP_ARGS(got_bits, pool_left, input_left), - - TP_STRUCT__entry( - __field( int, got_bits ) - __field( int, pool_left ) - __field( int, input_left ) - ), - - TP_fast_assign( - __entry->got_bits = got_bits; - __entry->pool_left = pool_left; - __entry->input_left = input_left; - ), - - TP_printk("got_bits %d nonblocking_pool_entropy_left %d " - "input_entropy_left %d", __entry->got_bits, - __entry->pool_left, __entry->input_left) -); - -TRACE_EVENT(prandom_u32, - - TP_PROTO(unsigned int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( unsigned int, ret) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret=%u" , __entry->ret) -); - -#endif /* _TRACE_RANDOM_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 4a3ab0ed6e06..1c714336b863 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1509,7 +1509,7 @@ TRACE_EVENT(rxrpc_call_reset, __entry->call_serial = call->rx_serial; __entry->conn_serial = call->conn->hi_serial; __entry->tx_seq = call->tx_hard_ack; - __entry->rx_seq = call->ackr_seen; + __entry->rx_seq = call->rx_hard_ack; ), TP_printk("c=%08x %08x:%08x r=%08x/%08x tx=%08x rx=%08x", diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index a966d4b5ab37..905b151bc3dd 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -98,7 +98,7 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_STRUCT__entry( __array(char, name, 32) - __field(long *, sysctl_mem) + __array(long, sysctl_mem, 3) __field(long, allocated) __field(int, sysctl_rmem) __field(int, rmem_alloc) @@ -110,7 +110,9 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_fast_assign( strncpy(__entry->name, prot->name, 32); - __entry->sysctl_mem = prot->sysctl_mem; + __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]); + __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]); + __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]); __entry->allocated = allocated; __entry->sysctl_rmem = sk_get_rmem0(sk, prot); __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 2070df64958e..b4feeb4b216a 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -283,7 +283,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __field(unsigned long, nr_scanned) __field(unsigned long, nr_skipped) __field(unsigned long, nr_taken) - __field(isolate_mode_t, isolate_mode) + __field(unsigned int, isolate_mode) __field(int, lru) ), @@ -294,7 +294,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __entry->nr_scanned = nr_scanned; __entry->nr_skipped = nr_skipped; __entry->nr_taken = nr_taken; - __entry->isolate_mode = isolate_mode; + __entry->isolate_mode = (__force unsigned int)isolate_mode; __entry->lru = lru; ), diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0f39fdcb2273..2a234023821e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5007,7 +5007,10 @@ struct bpf_pidns_info { /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ struct bpf_sk_lookup { - __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + union { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ + }; __u32 family; /* Protocol family (AF_INET, AF_INET6) */ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 7f30393b92c3..f76d11725c6c 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -44,7 +44,7 @@ struct dma_buf_sync { * between them in actual uapi, they're just different numbers. */ #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) -#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) -#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) +#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) +#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) #endif diff --git a/include/video/of_display_timing.h b/include/video/of_display_timing.h index e1126a74882a..eff166fdd81b 100644 --- a/include/video/of_display_timing.h +++ b/include/video/of_display_timing.h @@ -8,6 +8,8 @@ #ifndef __LINUX_OF_DISPLAY_TIMING_H #define __LINUX_OF_DISPLAY_TIMING_H +#include + struct device_node; struct display_timing; struct display_timings; diff --git a/init/Kconfig b/init/Kconfig index ee9b2396972d..6c119a4010ee 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -68,6 +68,11 @@ config CC_HAS_ASM_GOTO_OUTPUT depends on CC_HAS_ASM_GOTO def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) +config CC_HAS_ASM_GOTO_TIED_OUTPUT + depends on CC_HAS_ASM_GOTO_OUTPUT + # Detect buggy gcc and clang, fixed in gcc-11 clang-14. + def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .\n": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) + config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) diff --git a/init/main.c b/init/main.c index e0ad3b830313..e0033a6bcdf1 100644 --- a/init/main.c +++ b/init/main.c @@ -947,21 +947,18 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) softirq_init(); timekeeping_init(); kfence_init(); + time_init(); /* * For best initial stack canary entropy, prepare it after: * - setup_arch() for any UEFI RNG entropy and boot cmdline access - * - timekeeping_init() for ktime entropy used in rand_initialize() - * - rand_initialize() to get any arch-specific entropy like RDRAND - * - add_latent_entropy() to get any latent entropy - * - adding command line entropy + * - timekeeping_init() for ktime entropy used in random_init() + * - time_init() for making random_get_entropy() work on some platforms + * - random_init() to initialize the RNG from from early entropy sources */ - rand_initialize(); - add_latent_entropy(); - add_device_randomness(command_line, strlen(command_line)); + random_init(command_line); boot_init_stack_canary(); - time_init(); perf_event_init(); profile_init(); call_function_init(); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 05d2176cc471..86969de17084 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -45,6 +45,7 @@ struct mqueue_fs_context { struct ipc_namespace *ipc_ns; + bool newns; /* Set if newly created ipc namespace */ }; #define MQUEUE_MAGIC 0x19800202 @@ -424,6 +425,14 @@ static int mqueue_get_tree(struct fs_context *fc) { struct mqueue_fs_context *ctx = fc->fs_private; + /* + * With a newly created ipc namespace, we don't need to do a search + * for an ipc namespace match, but we still need to set s_fs_info. + */ + if (ctx->newns) { + fc->s_fs_info = ctx->ipc_ns; + return get_tree_nodev(fc, mqueue_fill_super); + } return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns); } @@ -451,6 +460,10 @@ static int mqueue_init_fs_context(struct fs_context *fc) return 0; } +/* + * mq_init_ns() is currently the only caller of mq_create_mount(). + * So the ns parameter is always a newly created ipc namespace. + */ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) { struct mqueue_fs_context *ctx; @@ -462,6 +475,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) return ERR_CAST(fc); ctx = fc->fs_private; + ctx->newns = true; put_ipc_ns(ctx->ipc_ns); ctx->ipc_ns = get_ipc_ns(ns); put_user_ns(fc->user_ns); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 41fc5020814e..234cf1cd08d0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -68,11 +68,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns { u8 *ptr = NULL; - if (k >= SKF_NET_OFF) + if (k >= SKF_NET_OFF) { ptr = skb_network_header(skb) + k - SKF_NET_OFF; - else if (k >= SKF_LL_OFF) + } else if (k >= SKF_LL_OFF) { + if (unlikely(!skb_mac_header_was_set(skb))) + return NULL; ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - + } if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; @@ -1657,6 +1659,11 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) CONT; \ LDX_MEM_##SIZEOP: \ DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ + CONT; \ + LDX_PROBE_MEM_##SIZEOP: \ + bpf_probe_read_kernel(&DST, sizeof(SIZE), \ + (const void *)(long) (SRC + insn->off)); \ + DST = *((SIZE *)&DST); \ CONT; LDST(B, u8) @@ -1664,15 +1671,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) LDST(W, u32) LDST(DW, u64) #undef LDST -#define LDX_PROBE(SIZEOP, SIZE) \ - LDX_PROBE_MEM_##SIZEOP: \ - bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \ - CONT; - LDX_PROBE(B, 1) - LDX_PROBE(H, 2) - LDX_PROBE(W, 4) - LDX_PROBE(DW, 8) -#undef LDX_PROBE STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ atomic_add((u32) SRC, (atomic_t *)(unsigned long) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 4575d2d60cb1..0c5bf98d5576 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -121,8 +121,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) return ERR_PTR(-E2BIG); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); - cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); - err = bpf_map_charge_init(&mem, cost); + err = bpf_map_charge_init(&mem, cost + attr->max_entries * + (sizeof(struct stack_map_bucket) + (u64)value_size)); if (err) return ERR_PTR(err); diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index ac34532150f7..3988776e1628 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -381,7 +381,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; int err = 0; - int cnt; + int cnt = 0, i; kind = bpf_attach_type_to_tramp(prog); mutex_lock(&tr->mutex); @@ -392,7 +392,10 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) err = -EBUSY; goto out; } - cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]; + + for (i = 0; i < BPF_TRAMP_MAX; i++) + cnt += tr->progs_cnt[i]; + if (kind == BPF_TRAMP_REPLACE) { /* Cannot attach extension if fentry/fexit are in use. */ if (cnt) { @@ -470,16 +473,19 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, void bpf_trampoline_put(struct bpf_trampoline *tr) { + int i; + if (!tr) return; mutex_lock(&trampoline_mutex); if (!refcount_dec_and_test(&tr->refcnt)) goto out; WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); - if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) - goto out; - if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) - goto out; + + for (i = 0; i < BPF_TRAMP_MAX; i++) + if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i]))) + goto out; + /* This code will be executed even when the last bpf_tramp_image * is alive. All progs are detached from the trampoline and the * trampoline image is patched with jmp into epilogue to skip diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cec01739a47c..dd7199320ebd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1249,6 +1249,21 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); } +static void reg_bounds_sync(struct bpf_reg_state *reg) +{ + /* We might have learned new bounds from the var_off. */ + __update_reg_bounds(reg); + /* We might have learned something about the sign bit. */ + __reg_deduce_bounds(reg); + /* We might have learned some bits from the bounds. */ + __reg_bound_offset(reg); + /* Intersecting with the old var_off might have improved our bounds + * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), + * then new var_off is (0; 0x7f...fc) which improves our umax. + */ + __update_reg_bounds(reg); +} + static bool __reg32_bound_s64(s32 a) { return a >= 0 && a <= S32_MAX; @@ -1290,16 +1305,8 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) * so they do not impact tnum bounds calculation. */ __mark_reg64_unbounded(reg); - __update_reg_bounds(reg); } - - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. - */ - __reg_deduce_bounds(reg); - __reg_bound_offset(reg); - __update_reg_bounds(reg); + reg_bounds_sync(reg); } static bool __reg64_bound_s32(s64 a) @@ -1315,7 +1322,6 @@ static bool __reg64_bound_u32(u64 a) static void __reg_combine_64_into_32(struct bpf_reg_state *reg) { __mark_reg32_unbounded(reg); - if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { reg->s32_min_value = (s32)reg->smin_value; reg->s32_max_value = (s32)reg->smax_value; @@ -1324,14 +1330,7 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg) reg->u32_min_value = (u32)reg->umin_value; reg->u32_max_value = (u32)reg->umax_value; } - - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. - */ - __reg_deduce_bounds(reg); - __reg_bound_offset(reg); - __update_reg_bounds(reg); + reg_bounds_sync(reg); } /* Mark a register as having a completely unknown (scalar) value. */ @@ -5230,9 +5229,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, ret_reg->s32_max_value = meta->msize_max_value; ret_reg->smin_value = -MAX_ERRNO; ret_reg->s32_min_value = -MAX_ERRNO; - __reg_deduce_bounds(ret_reg); - __reg_bound_offset(ret_reg); - __update_reg_bounds(ret_reg); + reg_bounds_sync(ret_reg); } static int @@ -6197,11 +6194,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) return -EINVAL; - - __update_reg_bounds(dst_reg); - __reg_deduce_bounds(dst_reg); - __reg_bound_offset(dst_reg); - + reg_bounds_sync(dst_reg); if (sanitize_check_bounds(env, insn, dst_reg) < 0) return -EACCES; if (sanitize_needed(opcode)) { @@ -6939,10 +6932,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, /* ALU32 ops are zero extended into 64bit register */ if (alu32) zext_32_to_64(dst_reg); - - __update_reg_bounds(dst_reg); - __reg_deduce_bounds(dst_reg); - __reg_bound_offset(dst_reg); + reg_bounds_sync(dst_reg); return 0; } @@ -7131,10 +7121,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->dst_reg); } zext_32_to_64(dst_reg); - - __update_reg_bounds(dst_reg); - __reg_deduce_bounds(dst_reg); - __reg_bound_offset(dst_reg); + reg_bounds_sync(dst_reg); } } else { /* case: R = imm @@ -7512,26 +7499,33 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, return; switch (opcode) { + /* JEQ/JNE comparison doesn't change the register equivalence. + * + * r1 = r2; + * if (r1 == 42) goto label; + * ... + * label: // here both r1 and r2 are known to be 42. + * + * Hence when marking register as known preserve it's ID. + */ case BPF_JEQ: - case BPF_JNE: - { - struct bpf_reg_state *reg = - opcode == BPF_JEQ ? true_reg : false_reg; - - /* JEQ/JNE comparison doesn't change the register equivalence. - * r1 = r2; - * if (r1 == 42) goto label; - * ... - * label: // here both r1 and r2 are known to be 42. - * - * Hence when marking register as known preserve it's ID. - */ - if (is_jmp32) - __mark_reg32_known(reg, val32); - else - ___mark_reg_known(reg, val); + if (is_jmp32) { + __mark_reg32_known(true_reg, val32); + true_32off = tnum_subreg(true_reg->var_off); + } else { + ___mark_reg_known(true_reg, val); + true_64off = true_reg->var_off; + } + break; + case BPF_JNE: + if (is_jmp32) { + __mark_reg32_known(false_reg, val32); + false_32off = tnum_subreg(false_reg->var_off); + } else { + ___mark_reg_known(false_reg, val); + false_64off = false_reg->var_off; + } break; - } case BPF_JSET: if (is_jmp32) { false_32off = tnum_and(false_32off, tnum_const(~val32)); @@ -7686,21 +7680,8 @@ static void __reg_combine_min_max(struct bpf_reg_state *src_reg, dst_reg->smax_value); src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, dst_reg->var_off); - /* We might have learned new bounds from the var_off. */ - __update_reg_bounds(src_reg); - __update_reg_bounds(dst_reg); - /* We might have learned something about the sign bit. */ - __reg_deduce_bounds(src_reg); - __reg_deduce_bounds(dst_reg); - /* We might have learned some bits from the bounds. */ - __reg_bound_offset(src_reg); - __reg_bound_offset(dst_reg); - /* Intersecting with the old var_off might have improved our bounds - * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), - * then new var_off is (0; 0x7f...fc) which improves our umax. - */ - __update_reg_bounds(src_reg); - __update_reg_bounds(dst_reg); + reg_bounds_sync(src_reg); + reg_bounds_sync(dst_reg); } static void reg_combine_min_max(struct bpf_reg_state *true_src, diff --git a/kernel/cpu.c b/kernel/cpu.c index d49ba9df8f06..90d09bafecf6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -1861,6 +1862,20 @@ core_initcall(cpu_hotplug_pm_sync_init); int __boot_cpu_id; +/* Horrific hacks because we can't add more to cpuhp_hp_states. */ +static int random_and_perf_prepare_fusion(unsigned int cpu) +{ + perf_event_init_cpu(cpu); + random_prepare_cpu(cpu); + return 0; +} +static int random_and_workqueue_online_fusion(unsigned int cpu) +{ + workqueue_online_cpu(cpu); + random_online_cpu(cpu); + return 0; +} + #endif /* CONFIG_SMP */ /* Boot processor state steps */ @@ -1879,7 +1894,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { }, [CPUHP_PERF_PREPARE] = { .name = "perf:prepare", - .startup.single = perf_event_init_cpu, + .startup.single = random_and_perf_prepare_fusion, .teardown.single = perf_event_exit_cpu, }, [CPUHP_WORKQUEUE_PREP] = { @@ -1995,7 +2010,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { }, [CPUHP_AP_WORKQUEUE_ONLINE] = { .name = "workqueue:online", - .startup.single = workqueue_online_cpu, + .startup.single = random_and_workqueue_online_fusion, .teardown.single = workqueue_offline_cpu, }, [CPUHP_AP_RCUTREE_ONLINE] = { diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 8661eb2b1771..0f31b22abe8d 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -756,6 +757,29 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, continue; kgdb_connected = 0; } else { + /* + * This is a brutal way to interfere with the debugger + * and prevent gdb being used to poke at kernel memory. + * This could cause trouble if lockdown is applied when + * there is already an active gdb session. For now the + * answer is simply "don't do that". Typically lockdown + * *will* be applied before the debug core gets started + * so only developers using kgdb for fairly advanced + * early kernel debug can be biten by this. Hopefully + * they are sophisticated enough to take care of + * themselves, especially with help from the lockdown + * message printed on the console! + */ + if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) { + if (IS_ENABLED(CONFIG_KGDB_KDB)) { + /* Switch back to kdb if possible... */ + dbg_kdb_mode = 1; + continue; + } else { + /* ... otherwise just bail */ + break; + } + } error = gdb_serial_stub(ks); } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 930ac1b25ec7..4e09fab52faf 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "kdb_private.h" #undef MODULE_PARAM_PREFIX @@ -197,10 +198,62 @@ struct task_struct *kdb_curr_task(int cpu) } /* - * Check whether the flags of the current command and the permissions - * of the kdb console has allow a command to be run. + * Update the permissions flags (kdb_cmd_enabled) to match the + * current lockdown state. + * + * Within this function the calls to security_locked_down() are "lazy". We + * avoid calling them if the current value of kdb_cmd_enabled already excludes + * flags that might be subject to lockdown. Additionally we deliberately check + * the lockdown flags independently (even though read lockdown implies write + * lockdown) since that results in both simpler code and clearer messages to + * the user on first-time debugger entry. + * + * The permission masks during a read+write lockdown permits the following + * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE). + * + * The INSPECT commands are not blocked during lockdown because they are + * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes + * forcing them to have no arguments) and lsmod. These commands do expose + * some kernel state but do not allow the developer seated at the console to + * choose what state is reported. SIGNAL and REBOOT should not be controversial, + * given these are allowed for root during lockdown already. */ -static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, +static void kdb_check_for_lockdown(void) +{ + const int write_flags = KDB_ENABLE_MEM_WRITE | + KDB_ENABLE_REG_WRITE | + KDB_ENABLE_FLOW_CTRL; + const int read_flags = KDB_ENABLE_MEM_READ | + KDB_ENABLE_REG_READ; + + bool need_to_lockdown_write = false; + bool need_to_lockdown_read = false; + + if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags)) + need_to_lockdown_write = + security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL); + + if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags)) + need_to_lockdown_read = + security_locked_down(LOCKDOWN_DBG_READ_KERNEL); + + /* De-compose KDB_ENABLE_ALL if required */ + if (need_to_lockdown_write || need_to_lockdown_read) + if (kdb_cmd_enabled & KDB_ENABLE_ALL) + kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL; + + if (need_to_lockdown_write) + kdb_cmd_enabled &= ~write_flags; + + if (need_to_lockdown_read) + kdb_cmd_enabled &= ~read_flags; +} + +/* + * Check whether the flags of the current command, the permissions of the kdb + * console and the lockdown state allow a command to be run. + */ +static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, bool no_args) { /* permissions comes from userspace so needs massaging slightly */ @@ -1194,6 +1247,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, kdb_curr_task(raw_smp_processor_id()); KDB_DEBUG_STATE("kdb_local 1", reason); + + kdb_check_for_lockdown(); + kdb_go_count = 0; if (reason == KDB_REASON_DEBUG) { /* special case below */ diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 6bca5077ee71..4833c6f7d3d0 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -448,7 +448,7 @@ void debug_dma_dump_mappings(struct device *dev) * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. */ -static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); +static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) @@ -564,7 +564,7 @@ static void add_dma_entry(struct dma_debug_entry *entry) rc = active_cacheline_insert(entry); if (rc == -ENOMEM) { - pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); + pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 73a03f8628be..2be1d337a708 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -190,7 +190,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, goto out_free_pages; if (force_dma_unencrypted(dev)) { err = set_memory_decrypted((unsigned long)ret, - 1 << get_order(size)); + PFN_UP(size)); if (err) goto out_free_pages; } @@ -212,7 +212,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, ret = page_address(page); if (force_dma_unencrypted(dev)) { err = set_memory_decrypted((unsigned long)ret, - 1 << get_order(size)); + PFN_UP(size)); if (err) goto out_free_pages; } @@ -233,7 +233,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, out_encrypt_pages: if (force_dma_unencrypted(dev)) { err = set_memory_encrypted((unsigned long)page_address(page), - 1 << get_order(size)); + PFN_UP(size)); /* If memory cannot be re-encrypted, it must be leaked */ if (err) return NULL; @@ -246,8 +246,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - unsigned int page_order = get_order(size); - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && !force_dma_unencrypted(dev)) { /* cpu_addr is a struct page cookie, not a kernel address */ @@ -268,7 +266,7 @@ void dma_direct_free(struct device *dev, size_t size, return; if (force_dma_unencrypted(dev)) - set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); + set_memory_encrypted((unsigned long)cpu_addr, PFN_UP(size)); if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) vunmap(cpu_addr); @@ -304,8 +302,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, ret = page_address(page); if (force_dma_unencrypted(dev)) { - if (set_memory_decrypted((unsigned long)ret, - 1 << get_order(size))) + if (set_memory_decrypted((unsigned long)ret, PFN_UP(size))) goto out_free_pages; } memset(ret, 0, size); @@ -320,7 +317,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_addr, enum dma_data_direction dir) { - unsigned int page_order = get_order(size); void *vaddr = page_address(page); /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ @@ -329,7 +325,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, return; if (force_dma_unencrypted(dev)) - set_memory_encrypted((unsigned long)vaddr, 1 << page_order); + set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); dma_free_contiguous(dev, page, size); } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 62b1e5fa8673..274587a57717 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -597,10 +597,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i); tlb_addr = slot_addr(io_tlb_start, index) + offset; - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE || - dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); + /* + * When dir == DMA_FROM_DEVICE we could omit the copy from the orig + * to the tlb buffer, if we knew for sure the device will + * overwirte the entire current content. But we don't. Thus + * unconditional bounce may prevent leaking swiotlb content (i.e. + * kernel memory) to user-space. + */ + swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); return tlb_addr; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 892478138dc9..167fff12e3b8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6182,10 +6182,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!atomic_inc_not_zero(&event->rb->mmap_count)) { /* - * Raced against perf_mmap_close() through - * perf_event_set_output(). Try again, hope for better - * luck. + * Raced against perf_mmap_close(); remove the + * event and try again. */ + ring_buffer_attach(event, NULL); mutex_unlock(&event->mmap_mutex); goto again; } @@ -11542,14 +11542,25 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, goto out; } +static void mutex_lock_double(struct mutex *a, struct mutex *b) +{ + if (b < a) + swap(a, b); + + mutex_lock(a); + mutex_lock_nested(b, SINGLE_DEPTH_NESTING); +} + static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event) { struct perf_buffer *rb = NULL; int ret = -EINVAL; - if (!output_event) + if (!output_event) { + mutex_lock(&event->mmap_mutex); goto set; + } /* don't allow circular references */ if (event == output_event) @@ -11587,8 +11598,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) event->pmu != output_event->pmu) goto out; + /* + * Hold both mmap_mutex to serialize against perf_mmap_close(). Since + * output_event is already on rb->event_list, and the list iteration + * restarts after every removal, it is guaranteed this new event is + * observed *OR* if output_event is already removed, it's guaranteed we + * observe !rb->mmap_count. + */ + mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex); set: - mutex_lock(&event->mmap_mutex); /* Can't redirect output if we've got an active mmap() */ if (atomic_read(&event->mmap_count)) goto unlock; @@ -11598,6 +11616,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) rb = ring_buffer_get(output_event); if (!rb) goto unlock; + + /* did we race against perf_mmap_close() */ + if (!atomic_read(&rb->mmap_count)) { + ring_buffer_put(rb); + goto unlock; + } } ring_buffer_attach(event, rb); @@ -11605,20 +11629,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) ret = 0; unlock: mutex_unlock(&event->mmap_mutex); + if (output_event) + mutex_unlock(&output_event->mmap_mutex); out: return ret; } -static void mutex_lock_double(struct mutex *a, struct mutex *b) -{ - if (b < a) - swap(a, b); - - mutex_lock(a); - mutex_lock_nested(b, SINGLE_DEPTH_NESTING); -} - static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) { bool nmi_safe = false; @@ -11901,6 +11918,9 @@ SYSCALL_DEFINE5(perf_event_open, * Do not allow to attach to a group in a different task * or CPU context. If we're moving SW events, we'll fix * this up later, so allow that. + * + * Racy, not holding group_leader->ctx->mutex, see comment with + * perf_event_ctx_lock(). */ if (!move_group && group_leader->ctx != ctx) goto err_context; @@ -11968,6 +11988,7 @@ SYSCALL_DEFINE5(perf_event_open, } else { perf_event_ctx_unlock(group_leader, gctx); move_group = 0; + goto not_move_group; } } @@ -11984,7 +12005,17 @@ SYSCALL_DEFINE5(perf_event_open, } } else { mutex_lock(&ctx->mutex); + + /* + * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx, + * see the group_leader && !move_group test earlier. + */ + if (group_leader && group_leader->ctx != ctx) { + err = -EINVAL; + goto err_locked; + } } +not_move_group: if (ctx->task == TASK_TOMBSTONE) { err = -ESRCH; diff --git a/kernel/exit.c b/kernel/exit.c index f61ac326daab..1410b68327a4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -784,7 +784,7 @@ void __noreturn do_exit(long code) #ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); - exit_itimers(tsk->signal); + exit_itimers(tsk); #endif if (tsk->mm) setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 762a928e18f9..8806444a6855 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -195,7 +195,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) retval = __handle_irq_event_percpu(desc, &flags); - add_interrupt_randomness(desc->irq_data.irq, flags); + add_interrupt_randomness(desc->irq_data.irq); if (!noirqdebug) note_interrupt(desc, retval); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index aea9104265f2..fff11916aba3 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -29,6 +29,15 @@ #include #include "kexec_internal.h" +#ifdef CONFIG_KEXEC_SIG +static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE); + +void set_kexec_sig_enforced(void) +{ + sig_enforce = true; +} +#endif + static int kexec_calculate_store_digests(struct kimage *image); /* @@ -108,40 +117,6 @@ int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, } #endif -/* - * arch_kexec_apply_relocations_add - apply relocations of type RELA - * @pi: Purgatory to be relocated. - * @section: Section relocations applying to. - * @relsec: Section containing RELAs. - * @symtab: Corresponding symtab. - * - * Return: 0 on success, negative errno on error. - */ -int __weak -arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, - const Elf_Shdr *relsec, const Elf_Shdr *symtab) -{ - pr_err("RELA relocation unsupported.\n"); - return -ENOEXEC; -} - -/* - * arch_kexec_apply_relocations - apply relocations of type REL - * @pi: Purgatory to be relocated. - * @section: Section relocations applying to. - * @relsec: Section containing RELs. - * @symtab: Corresponding symtab. - * - * Return: 0 on success, negative errno on error. - */ -int __weak -arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section, - const Elf_Shdr *relsec, const Elf_Shdr *symtab) -{ - pr_err("REL relocation unsupported.\n"); - return -ENOEXEC; -} - /* * Free up memory used by kernel, initrd, and command line. This is temporary * memory allocation which is not needed any more after these buffers have @@ -193,7 +168,7 @@ kimage_validate_signature(struct kimage *image) image->kernel_buf_len); if (ret) { - if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) { + if (sig_enforce) { pr_notice("Enforced kernel signature verification failed (%d).\n", ret); return ret; } diff --git a/kernel/module.c b/kernel/module.c index 7db4c615143a..91b50848b699 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2297,6 +2297,15 @@ void *__symbol_get(const char *symbol) } EXPORT_SYMBOL_GPL(__symbol_get); +static bool module_init_layout_section(const char *sname) +{ +#ifndef CONFIG_MODULE_UNLOAD + if (module_exit_section(sname)) + return true; +#endif + return module_init_section(sname); +} + /* * Ensure that an exported symbol [global namespace] does not already exist * in the kernel or in some other module's exported symbol table. @@ -2506,7 +2515,7 @@ static void layout_sections(struct module *mod, struct load_info *info) if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1]) || s->sh_entsize != ~0UL - || module_init_section(sname)) + || module_init_layout_section(sname)) continue; s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i); pr_debug("\t%s\n", sname); @@ -2539,7 +2548,7 @@ static void layout_sections(struct module *mod, struct load_info *info) if ((s->sh_flags & masks[m][0]) != masks[m][0] || (s->sh_flags & masks[m][1]) || s->sh_entsize != ~0UL - || !module_init_section(sname)) + || !module_init_layout_section(sname)) continue; s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i) | INIT_OFFSET_MASK); @@ -3188,11 +3197,6 @@ static int rewrite_section_headers(struct load_info *info, int flags) temporary image. */ shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset; -#ifndef CONFIG_MODULE_UNLOAD - /* Don't load .exit sections */ - if (module_exit_section(info->secstrings+shdr->sh_name)) - shdr->sh_flags &= ~(unsigned long)SHF_ALLOC; -#endif } /* Track but don't keep modinfo and version sections. */ diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d99f73f83bf5..aab480e24bd6 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -1219,9 +1219,8 @@ int ptrace_request(struct task_struct *child, long request, return ptrace_resume(child, request, data); case PTRACE_KILL: - if (child->exit_state) /* already dead */ - return 0; - return ptrace_resume(child, request, SIGKILL); + send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); + return 0; #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index b71e21f73c40..cd6e11403f1b 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -86,6 +86,7 @@ config TASKS_RCU config TASKS_RUDE_RCU def_bool 0 + select IRQ_WORK help This option enables a task-based RCU implementation that uses only context switch (including preemption) and user-mode diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 7c05c5ab7865..14af29fe1377 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -620,6 +620,9 @@ static void rcu_tasks_be_rude(struct work_struct *work) // Wait for one rude RCU-tasks grace period. static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp) { + if (num_online_cpus() <= 1) + return; // Fastpath for only one CPU. + rtp->n_ipis += cpumask_weight(cpu_online_mask); schedule_on_each_cpu(rcu_tasks_be_rude); } diff --git a/kernel/scftorture.c b/kernel/scftorture.c index 554a521ee235..060ee0b1569a 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -253,9 +253,10 @@ static void scf_handler(void *scfc_in) } this_cpu_inc(scf_invoked_count); if (longwait <= 0) { - if (!(r & 0xffc0)) + if (!(r & 0xffc0)) { udelay(r & 0x3f); - goto out; + goto out; + } } if (r & 0xfff) goto out; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 16230441b47a..9e3e113c5e47 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1563,7 +1563,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) * the throttle. */ p->dl.dl_throttled = 0; - BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH); + if (!(flags & ENQUEUE_REPLENISH)) + printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n", + task_pid_nr(p)); + return; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 05682fab3872..bae53499a9d7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4788,8 +4788,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) cfs_rq->throttle_count--; if (!cfs_rq->throttle_count) { - cfs_rq->throttled_clock_task_time += rq_clock_task_mult(rq) - - cfs_rq->throttled_clock_task; + cfs_rq->throttled_clock_pelt_time += rq_clock_task_mult(rq) - + cfs_rq->throttled_clock_pelt; /* Add cfs_rq with already running entity in the list */ if (cfs_rq->nr_running >= 1) @@ -4806,7 +4806,7 @@ static int tg_throttle_down(struct task_group *tg, void *data) /* group is entering throttled state, stop time */ if (!cfs_rq->throttle_count) { - cfs_rq->throttled_clock_task = rq_clock_task_mult(rq); + cfs_rq->throttled_clock_pelt = rq_clock_task_mult(rq); list_del_leaf_cfs_rq(cfs_rq); } cfs_rq->throttle_count++; @@ -5224,7 +5224,7 @@ static void sync_throttle(struct task_group *tg, int cpu) pcfs_rq = tg->parent->cfs_rq[cpu]; cfs_rq->throttle_count = pcfs_rq->throttle_count; - cfs_rq->throttled_clock_task = rq_clock_task_mult(cpu_rq(cpu)); + cfs_rq->throttled_clock_pelt = rq_clock_task_mult(cpu_rq(cpu)); } /* conditionally throttle active cfs_rq's from put_prev_entity() */ diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index 3f00596def11..0245e3e21f93 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -152,9 +152,9 @@ static inline u64 rq_clock_pelt(struct rq *rq) static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) { if (unlikely(cfs_rq->throttle_count)) - return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time; + return cfs_rq->throttled_clock_pelt - cfs_rq->throttled_clock_pelt_time; - return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time; + return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_pelt_time; } #else static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a3c3bc3ca751..fc03f982eb6a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -609,8 +609,8 @@ struct cfs_rq { s64 runtime_remaining; u64 throttled_clock; - u64 throttled_clock_task; - u64 throttled_clock_task_time; + u64 throttled_clock_pelt; + u64 throttled_clock_pelt_time; int throttled; int throttle_count; struct list_head throttled_list; diff --git a/kernel/signal.c b/kernel/signal.c index 71a550524bd7..c75a1d5f9d51 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1924,12 +1924,12 @@ bool do_notify_parent(struct task_struct *tsk, int sig) bool autoreap = false; u64 utime, stime; - BUG_ON(sig == -1); + WARN_ON_ONCE(sig == -1); - /* do_notify_parent_cldstop should have been called instead. */ - BUG_ON(task_is_stopped_or_traced(tsk)); + /* do_notify_parent_cldstop should have been called instead. */ + WARN_ON_ONCE(task_is_stopped_or_traced(tsk)); - BUG_ON(!tsk->ptrace && + WARN_ON_ONCE(!tsk->ptrace && (tsk->group_leader != tsk || !thread_group_empty(tsk))); /* Wake up all pidfd waiters */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3632cfbc88b0..759215b95545 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -560,14 +560,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, if (*negp) { if (*lvalp > (unsigned long) INT_MAX + 1) return -EINVAL; - *valp = -*lvalp; + WRITE_ONCE(*valp, -*lvalp); } else { if (*lvalp > (unsigned long) INT_MAX) return -EINVAL; - *valp = *lvalp; + WRITE_ONCE(*valp, *lvalp); } } else { - int val = *valp; + int val = READ_ONCE(*valp); if (val < 0) { *negp = true; *lvalp = -(unsigned long)val; @@ -586,9 +586,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, if (write) { if (*lvalp > UINT_MAX) return -EINVAL; - *valp = *lvalp; + WRITE_ONCE(*valp, *lvalp); } else { - unsigned int val = *valp; + unsigned int val = READ_ONCE(*valp); *lvalp = (unsigned long)val; } return 0; @@ -962,7 +962,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, if ((param->min && *param->min > tmp) || (param->max && *param->max < tmp)) return -EINVAL; - *valp = tmp; + WRITE_ONCE(*valp, tmp); } return 0; @@ -1028,7 +1028,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, (param->max && *param->max < tmp)) return -ERANGE; - *valp = tmp; + WRITE_ONCE(*valp, tmp); } return 0; @@ -1196,9 +1196,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, err = -EINVAL; break; } - *i = val; + WRITE_ONCE(*i, val); } else { - val = convdiv * (*i) / convmul; + val = convdiv * READ_ONCE(*i) / convmul; if (!first) proc_put_char(&buffer, &left, '\t'); proc_put_long(&buffer, &left, val, false); @@ -1279,9 +1279,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, if (write) { if (*lvalp > INT_MAX / HZ) return 1; - *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); + if (*negp) + WRITE_ONCE(*valp, -*lvalp * HZ); + else + WRITE_ONCE(*valp, *lvalp * HZ); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; @@ -1327,9 +1330,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, if (jif > INT_MAX) return 1; - *valp = (int)jif; + WRITE_ONCE(*valp, (int)jif); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; @@ -1397,8 +1400,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, * @ppos: the current position in the file * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer - * values from/to the user buffer, treated as an ASCII string. - * The values read are assumed to be in 1/1000 seconds, and + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in 1/1000 seconds, and * are converted into jiffies. * * Returns 0 on success. @@ -2823,6 +2826,17 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two_hundred, }, +#ifdef CONFIG_NUMA + { + .procname = "numa_stat", + .data = &sysctl_vm_numa_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_vm_numa_stat_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", @@ -2839,15 +2853,6 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &hugetlb_mempolicy_sysctl_handler, }, - { - .procname = "numa_stat", - .data = &sysctl_vm_numa_stat, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = sysctl_vm_numa_stat_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #endif { .procname = "hugetlb_shm_group", diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index dd5697d7347b..b624788023d8 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1051,15 +1051,24 @@ static void itimer_delete(struct k_itimer *timer) } /* - * This is called by do_exit or de_thread, only when there are no more - * references to the shared signal_struct. + * This is called by do_exit or de_thread, only when nobody else can + * modify the signal->posix_timers list. Yet we need sighand->siglock + * to prevent the race with /proc/pid/timers. */ -void exit_itimers(struct signal_struct *sig) +void exit_itimers(struct task_struct *tsk) { + struct list_head timers; struct k_itimer *tmr; - while (!list_empty(&sig->posix_timers)) { - tmr = list_entry(sig->posix_timers.next, struct k_itimer, list); + if (list_empty(&tsk->signal->posix_timers)) + return; + + spin_lock_irq(&tsk->sighand->siglock); + list_replace_init(&tsk->signal->posix_timers, &timers); + spin_unlock_irq(&tsk->sighand->siglock); + + while (!list_empty(&timers)) { + tmr = list_first_entry(&timers, struct k_itimer, list); itimer_delete(tmr); } } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 168fca01e400..07969fa0ab2e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -428,7 +428,6 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask) cpumask_copy(tick_nohz_full_mask, cpumask); tick_nohz_full_running = true; } -EXPORT_SYMBOL_GPL(tick_nohz_full_setup); static int tick_nohz_cpu_down(unsigned int cpu) { diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cc4dc2857a87..e12ce2821dba 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -2378,6 +2379,20 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc) return 0; } +/** + * random_get_entropy_fallback - Returns the raw clock source value, + * used by random.c for platforms with no valid random_get_entropy(). + */ +unsigned long random_get_entropy_fallback(void) +{ + struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; + struct clocksource *clock = READ_ONCE(tkr->clock); + + if (unlikely(timekeeping_suspended || !clock)) + return 0; + return clock->read(clock); +} +EXPORT_SYMBOL_GPL(random_get_entropy_fallback); /** * do_adjtimex() - Accessor function to NTP __do_adjtimex function diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4a5d35dc490b..a63713dcd05d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4427,7 +4427,7 @@ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, * @ip: The instruction pointer address to remove the data from * * Returns the data if it is found, otherwise NULL. - * Note, if the data pointer is used as the data itself, (see + * Note, if the data pointer is used as the data itself, (see * ftrace_func_mapper_find_ip(), then the return value may be meaningless, * if the data pointer was set to zero. */ @@ -5153,8 +5153,6 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) __add_hash_entry(direct_functions, entry); ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); - if (ret) - remove_hash_entry(direct_functions, entry); if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) { ret = register_ftrace_function(&direct_ops); @@ -5163,6 +5161,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) } if (ret) { + remove_hash_entry(direct_functions, entry); kfree(entry); if (!direct->count) { list_del_rcu(&direct->next); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2509e3a3f083..a7c1a703b830 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2785,7 +2785,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); -static DEFINE_SPINLOCK(tracepoint_iter_lock); +static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); static DEFINE_MUTEX(tracepoint_printk_mutex); static void output_printk(struct trace_event_buffer *fbuffer) @@ -2813,14 +2813,14 @@ static void output_printk(struct trace_event_buffer *fbuffer) event = &fbuffer->trace_file->event_call->event; - spin_lock_irqsave(&tracepoint_iter_lock, flags); + raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); trace_seq_init(&iter->seq); iter->ent = fbuffer->entry; event_call->event.funcs->trace(iter, 0, event); trace_seq_putc(&iter->seq, 0); printk("%s", iter->seq.buffer); - spin_unlock_irqrestore(&tracepoint_iter_lock, flags); + raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); } int tracepoint_printk_sysctl(struct ctl_table *table, int write, @@ -5908,12 +5908,18 @@ static void tracing_set_nop(struct trace_array *tr) tr->current_trace = &nop_trace; } +static bool tracer_options_updated; + static void add_tracer_options(struct trace_array *tr, struct tracer *t) { /* Only enable if the directory has been created already. */ if (!tr->dir) return; + /* Only create trace option files after update_tracer_options finish */ + if (!tracer_options_updated) + return; + create_trace_option_files(tr, t); } @@ -8650,6 +8656,7 @@ static void __update_tracer_options(struct trace_array *tr) static void update_tracer_options(struct trace_array *tr) { mutex_lock(&trace_types_lock); + tracer_options_updated = true; __update_tracer_options(tr); mutex_unlock(&trace_types_lock); } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index eb7200699cf6..fd5416829445 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1789,8 +1789,11 @@ static int init_var_ref(struct hist_field *ref_field, return err; free: kfree(ref_field->system); + ref_field->system = NULL; kfree(ref_field->event_name); + ref_field->event_name = NULL; kfree(ref_field->name); + ref_field->name = NULL; goto out; } @@ -3940,6 +3943,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { + kfree(hist_data->attrs->var_defs.name[n_vars]); + hist_data->attrs->var_defs.name[n_vars] = NULL; ret = -ENOMEM; goto free; } diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index dcf1e676797d..d29731a30b8e 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -227,7 +227,7 @@ void __post_watch_notification(struct watch_list *wlist, if (lock_wqueue(wqueue)) { post_one_notification(wqueue, n); - unlock_wqueue(wqueue);; + unlock_wqueue(wqueue); } } @@ -457,6 +457,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) rcu_assign_pointer(watch->queue, wqueue); } +static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue) +{ + const struct cred *cred; + struct watch *w; + + hlist_for_each_entry(w, &wlist->watchers, list_node) { + struct watch_queue *wq = rcu_access_pointer(w->queue); + if (wqueue == wq && watch->id == w->id) + return -EBUSY; + } + + cred = current_cred(); + if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) { + atomic_dec(&cred->user->nr_watches); + return -EAGAIN; + } + + watch->cred = get_cred(cred); + rcu_assign_pointer(watch->watch_list, wlist); + + kref_get(&wqueue->usage); + kref_get(&watch->usage); + hlist_add_head(&watch->queue_node, &wqueue->watches); + hlist_add_head_rcu(&watch->list_node, &wlist->watchers); + return 0; +} + /** * add_watch_to_object - Add a watch on an object to a watch list * @watch: The watch to add @@ -471,34 +498,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) */ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) { - struct watch_queue *wqueue = rcu_access_pointer(watch->queue); - struct watch *w; + struct watch_queue *wqueue; + int ret = -ENOENT; - hlist_for_each_entry(w, &wlist->watchers, list_node) { - struct watch_queue *wq = rcu_access_pointer(w->queue); - if (wqueue == wq && watch->id == w->id) - return -EBUSY; - } - - watch->cred = get_current_cred(); - rcu_assign_pointer(watch->watch_list, wlist); - - if (atomic_inc_return(&watch->cred->user->nr_watches) > - task_rlimit(current, RLIMIT_NOFILE)) { - atomic_dec(&watch->cred->user->nr_watches); - put_cred(watch->cred); - return -EAGAIN; - } + rcu_read_lock(); + wqueue = rcu_access_pointer(watch->queue); if (lock_wqueue(wqueue)) { - kref_get(&wqueue->usage); - kref_get(&watch->usage); - hlist_add_head(&watch->queue_node, &wqueue->watches); + spin_lock(&wlist->lock); + ret = add_one_watch(watch, wlist, wqueue); + spin_unlock(&wlist->lock); unlock_wqueue(wqueue); } - hlist_add_head(&watch->list_node, &wlist->watchers); - return 0; + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(add_watch_to_object); diff --git a/lib/Kconfig b/lib/Kconfig index 0abb7975586c..f9660d0332c6 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -101,6 +101,11 @@ config INDIRECT_PIO When in doubt, say N. +source "lib/crypto/Kconfig" + +config LIB_MEMNEQ + bool + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8fa27693e1bf..b7c4ee764506 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1427,8 +1427,7 @@ config WARN_ALL_UNSEEDED_RANDOM so architecture maintainers really need to do what they can to get the CRNG seeded sooner after the system is booted. However, since users cannot do anything actionable to - address this, by default the kernel will issue only a single - warning for the first use of unseeded randomness. + address this, by default this option is disabled. Say Y here if you want to receive warnings for all uses of unseeded randomness. This will be of use primarily for diff --git a/lib/Makefile b/lib/Makefile index 5550543bfa80..1864a5e89741 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -248,6 +248,7 @@ obj-$(CONFIG_DIMLIB) += dim/ obj-$(CONFIG_SIGNATURE) += digsig.o lib-$(CONFIG_CLZ_TAB) += clz_tab.o +lib-$(CONFIG_LIB_MEMNEQ) += memneq.o obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o @@ -274,7 +275,7 @@ $(foreach file, $(libfdt_files), \ $(eval CFLAGS_$(file) = -I $(srctree)/scripts/dtc/libfdt)) lib-$(CONFIG_LIBFDT) += $(libfdt_files) -lib-$(CONFIG_BOOT_CONFIG) += bootconfig.o +obj-$(CONFIG_BOOT_CONFIG) += bootconfig.o obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 6f4bcf524554..b537a83678e1 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -1462,6 +1462,7 @@ int assoc_array_gc(struct assoc_array *array, struct assoc_array_ptr *cursor, *ptr; struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; unsigned long nr_leaves_on_tree; + bool retained; int keylen, slot, nr_free, next_slot, i; pr_devel("-->%s()\n", __func__); @@ -1538,6 +1539,7 @@ int assoc_array_gc(struct assoc_array *array, goto descend; } +retry_compress: pr_devel("-- compress node %p --\n", new_n); /* Count up the number of empty slots in this node and work out the @@ -1555,6 +1557,7 @@ int assoc_array_gc(struct assoc_array *array, pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); /* See what we can fold in */ + retained = false; next_slot = 0; for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { struct assoc_array_shortcut *s; @@ -1604,9 +1607,14 @@ int assoc_array_gc(struct assoc_array *array, pr_devel("[%d] retain node %lu/%d [nx %d]\n", slot, child->nr_leaves_on_branch, nr_free + 1, next_slot); + retained = true; } } + if (retained && new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { + pr_devel("internal nodes remain despite enough space, retrying\n"); + goto retry_compress; + } pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); nr_leaves_on_tree = new_n->nr_leaves_on_branch; diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 14c032de276e..2082af43d51f 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -comment "Crypto library routines" +menu "Crypto library routines" config CRYPTO_LIB_AES tristate @@ -9,14 +9,14 @@ config CRYPTO_LIB_ARC4 tristate config CRYPTO_ARCH_HAVE_LIB_BLAKE2S - tristate + bool help Declares whether the architecture provides an arch-specific accelerated implementation of the Blake2s library interface, either builtin or as a module. config CRYPTO_LIB_BLAKE2S_GENERIC - tristate + def_bool !CRYPTO_ARCH_HAVE_LIB_BLAKE2S help This symbol can be depended upon by arch implementations of the Blake2s library interface that require the generic code as a @@ -24,15 +24,6 @@ config CRYPTO_LIB_BLAKE2S_GENERIC implementation is enabled, this implementation serves the users of CRYPTO_LIB_BLAKE2S. -config CRYPTO_LIB_BLAKE2S - tristate "BLAKE2s hash function library" - depends on CRYPTO_ARCH_HAVE_LIB_BLAKE2S || !CRYPTO_ARCH_HAVE_LIB_BLAKE2S - select CRYPTO_LIB_BLAKE2S_GENERIC if CRYPTO_ARCH_HAVE_LIB_BLAKE2S=n - help - Enable the Blake2s library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. - config CRYPTO_ARCH_HAVE_LIB_CHACHA tristate help @@ -42,7 +33,7 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA config CRYPTO_LIB_CHACHA_GENERIC tristate - select CRYPTO_ALGAPI + select XOR_BLOCKS help This symbol can be depended upon by arch implementations of the ChaCha library interface that require the generic code as a @@ -52,6 +43,7 @@ config CRYPTO_LIB_CHACHA_GENERIC config CRYPTO_LIB_CHACHA tristate "ChaCha library interface" + depends on CRYPTO depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n help @@ -79,6 +71,7 @@ config CRYPTO_LIB_CURVE25519 tristate "Curve25519 scalar multiplication library" depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519 select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n + select LIB_MEMNEQ help Enable the Curve25519 library interface. This interface may be fulfilled by either the generic implementation or an arch-specific @@ -123,8 +116,12 @@ config CRYPTO_LIB_CHACHA20POLY1305 tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)" depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 + depends on CRYPTO select CRYPTO_LIB_CHACHA select CRYPTO_LIB_POLY1305 + select CRYPTO_ALGAPI config CRYPTO_LIB_SHA256 tristate + +endmenu diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 3a435629d9ce..26be2bbe09c5 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -10,11 +10,10 @@ libaes-y := aes.o obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o libarc4-y := arc4.o -obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += libblake2s-generic.o -libblake2s-generic-y += blake2s-generic.o - -obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o -libblake2s-y += blake2s.o +# blake2s is used by the /dev/random driver which is always builtin +obj-y += libblake2s.o +libblake2s-y := blake2s.o +libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o libchacha20poly1305-y += chacha20poly1305.o diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c index 04ff8df24513..75ccb3e633e6 100644 --- a/lib/crypto/blake2s-generic.c +++ b/lib/crypto/blake2s-generic.c @@ -37,7 +37,11 @@ static inline void blake2s_increment_counter(struct blake2s_state *state, state->t[1] += (state->t[0] < inc); } -void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) + __weak __alias(blake2s_compress_generic); + +void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, size_t nblocks, const u32 inc) { u32 m[16]; diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 5d9ea53be973..409e4b728770 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -15,7 +15,6 @@ * #include * * #include - * #include * * #define BLAKE2S_TESTVEC_COUNT 256 * @@ -58,16 +57,6 @@ * } * printf("};\n\n"); * - * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); - * - * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL); - * print_vec(hash, BLAKE2S_OUTBYTES); - * - * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL); - * print_vec(hash, BLAKE2S_OUTBYTES); - * - * printf("};\n"); - * * return 0; *} */ @@ -554,15 +543,6 @@ static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, }; -static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { - { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70, - 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79, - 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, }, - { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9, - 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f, - 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, }, -}; - bool __init blake2s_selftest(void) { u8 key[BLAKE2S_KEY_SIZE]; @@ -607,16 +587,5 @@ bool __init blake2s_selftest(void) } } - if (success) { - blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key)); - success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE); - - blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf)); - success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE); - - if (!success) - pr_err("blake2s256_hmac self-test: FAIL\n"); - } - return success; } diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index c64ac8bfb6a9..80b194f9a0a0 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -16,63 +16,20 @@ #include #include -#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S) -# define blake2s_compress blake2s_compress_arch -#else -# define blake2s_compress blake2s_compress_generic -#endif - void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) { - __blake2s_update(state, in, inlen, blake2s_compress); + __blake2s_update(state, in, inlen, false); } EXPORT_SYMBOL(blake2s_update); void blake2s_final(struct blake2s_state *state, u8 *out) { WARN_ON(IS_ENABLED(DEBUG) && !out); - __blake2s_final(state, out, blake2s_compress); + __blake2s_final(state, out, false); memzero_explicit(state, sizeof(*state)); } EXPORT_SYMBOL(blake2s_final); -void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, - const size_t keylen) -{ - struct blake2s_state state; - u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; - u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); - int i; - - if (keylen > BLAKE2S_BLOCK_SIZE) { - blake2s_init(&state, BLAKE2S_HASH_SIZE); - blake2s_update(&state, key, keylen); - blake2s_final(&state, x_key); - } else - memcpy(x_key, key, keylen); - - for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) - x_key[i] ^= 0x36; - - blake2s_init(&state, BLAKE2S_HASH_SIZE); - blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); - blake2s_update(&state, in, inlen); - blake2s_final(&state, i_hash); - - for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) - x_key[i] ^= 0x5c ^ 0x36; - - blake2s_init(&state, BLAKE2S_HASH_SIZE); - blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); - blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); - blake2s_final(&state, i_hash); - - memcpy(out, i_hash, BLAKE2S_HASH_SIZE); - memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); - memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); -} -EXPORT_SYMBOL(blake2s256_hmac); - static int __init mod_init(void) { if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && diff --git a/lib/idr.c b/lib/idr.c index f4ab4f4aa3c7..7ecdfdb5309e 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -491,7 +491,8 @@ void ida_free(struct ida *ida, unsigned int id) struct ida_bitmap *bitmap; unsigned long flags; - BUG_ON((int)id < 0); + if ((int)id < 0) + return; xas_lock_irqsave(&xas, flags); bitmap = xas_load(&xas); diff --git a/crypto/memneq.c b/lib/memneq.c similarity index 100% rename from crypto/memneq.c rename to lib/memneq.c diff --git a/lib/nodemask.c b/lib/nodemask.c index 3aa454c54c0d..e22647f5181b 100644 --- a/lib/nodemask.c +++ b/lib/nodemask.c @@ -3,9 +3,9 @@ #include #include -int __next_node_in(int node, const nodemask_t *srcp) +unsigned int __next_node_in(int node, const nodemask_t *srcp) { - int ret = __next_node(node, srcp); + unsigned int ret = __next_node(node, srcp); if (ret == MAX_NUMNODES) ret = __first_node(srcp); diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index e59eda07305e..493093b97093 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -75,6 +75,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, data = kzalloc(sizeof(*ref->data), gfp); if (!data) { free_percpu((void __percpu *)ref->percpu_count_ptr); + ref->percpu_count_ptr = 0; return -ENOMEM; } diff --git a/lib/random32.c b/lib/random32.c index 4d0e05e471d7..f0ab17c2244b 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,8 +39,9 @@ #include #include #include +#include +#include #include -#include /** * prandom_u32_state - seeded pseudo-random number generator. @@ -386,7 +387,6 @@ u32 prandom_u32(void) struct siprand_state *state = get_cpu_ptr(&net_rand_state); u32 res = siprand_u32(state); - trace_prandom_u32(res); put_cpu_ptr(&net_rand_state); return res; } @@ -552,9 +552,11 @@ static void prandom_reseed(struct timer_list *unused) * To avoid worrying about whether it's safe to delay that interrupt * long enough to seed all CPUs, just schedule an immediate timer event. */ -static void prandom_timer_start(struct random_ready_callback *unused) +static int prandom_timer_start(struct notifier_block *nb, + unsigned long action, void *data) { mod_timer(&seed_timer, jiffies); + return 0; } #ifdef CONFIG_RANDOM32_SELFTEST @@ -618,13 +620,13 @@ core_initcall(prandom32_state_selftest); */ static int __init prandom_init_late(void) { - static struct random_ready_callback random_ready = { - .func = prandom_timer_start + static struct notifier_block random_ready = { + .notifier_call = prandom_timer_start }; - int ret = add_random_ready_callback(&random_ready); + int ret = register_random_ready_notifier(&random_ready); if (ret == -EALREADY) { - prandom_timer_start(&random_ready); + prandom_timer_start(&random_ready, 0, NULL); ret = 0; } return ret; diff --git a/lib/sha1.c b/lib/sha1.c index 49257a915bb6..5ad4e4948272 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,8 @@ #define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ __u32 TEMP = input(t); setW(t, TEMP); \ E += TEMP + rol32(A,5) + (fn) + (constant); \ - B = ror32(B, 2); } while (0) + B = ror32(B, 2); \ + TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0) #define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) #define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) @@ -84,6 +86,7 @@ void sha1_transform(__u32 *digest, const char *data, __u32 *array) { __u32 A, B, C, D, E; + unsigned int i = 0; A = digest[0]; B = digest[1]; @@ -92,94 +95,24 @@ void sha1_transform(__u32 *digest, const char *data, __u32 *array) E = digest[4]; /* Round 1 - iterations 0-16 take their input from 'data' */ - T_0_15( 0, A, B, C, D, E); - T_0_15( 1, E, A, B, C, D); - T_0_15( 2, D, E, A, B, C); - T_0_15( 3, C, D, E, A, B); - T_0_15( 4, B, C, D, E, A); - T_0_15( 5, A, B, C, D, E); - T_0_15( 6, E, A, B, C, D); - T_0_15( 7, D, E, A, B, C); - T_0_15( 8, C, D, E, A, B); - T_0_15( 9, B, C, D, E, A); - T_0_15(10, A, B, C, D, E); - T_0_15(11, E, A, B, C, D); - T_0_15(12, D, E, A, B, C); - T_0_15(13, C, D, E, A, B); - T_0_15(14, B, C, D, E, A); - T_0_15(15, A, B, C, D, E); + for (; i < 16; ++i) + T_0_15(i, A, B, C, D, E); /* Round 1 - tail. Input from 512-bit mixing array */ - T_16_19(16, E, A, B, C, D); - T_16_19(17, D, E, A, B, C); - T_16_19(18, C, D, E, A, B); - T_16_19(19, B, C, D, E, A); + for (; i < 20; ++i) + T_16_19(i, A, B, C, D, E); /* Round 2 */ - T_20_39(20, A, B, C, D, E); - T_20_39(21, E, A, B, C, D); - T_20_39(22, D, E, A, B, C); - T_20_39(23, C, D, E, A, B); - T_20_39(24, B, C, D, E, A); - T_20_39(25, A, B, C, D, E); - T_20_39(26, E, A, B, C, D); - T_20_39(27, D, E, A, B, C); - T_20_39(28, C, D, E, A, B); - T_20_39(29, B, C, D, E, A); - T_20_39(30, A, B, C, D, E); - T_20_39(31, E, A, B, C, D); - T_20_39(32, D, E, A, B, C); - T_20_39(33, C, D, E, A, B); - T_20_39(34, B, C, D, E, A); - T_20_39(35, A, B, C, D, E); - T_20_39(36, E, A, B, C, D); - T_20_39(37, D, E, A, B, C); - T_20_39(38, C, D, E, A, B); - T_20_39(39, B, C, D, E, A); + for (; i < 40; ++i) + T_20_39(i, A, B, C, D, E); /* Round 3 */ - T_40_59(40, A, B, C, D, E); - T_40_59(41, E, A, B, C, D); - T_40_59(42, D, E, A, B, C); - T_40_59(43, C, D, E, A, B); - T_40_59(44, B, C, D, E, A); - T_40_59(45, A, B, C, D, E); - T_40_59(46, E, A, B, C, D); - T_40_59(47, D, E, A, B, C); - T_40_59(48, C, D, E, A, B); - T_40_59(49, B, C, D, E, A); - T_40_59(50, A, B, C, D, E); - T_40_59(51, E, A, B, C, D); - T_40_59(52, D, E, A, B, C); - T_40_59(53, C, D, E, A, B); - T_40_59(54, B, C, D, E, A); - T_40_59(55, A, B, C, D, E); - T_40_59(56, E, A, B, C, D); - T_40_59(57, D, E, A, B, C); - T_40_59(58, C, D, E, A, B); - T_40_59(59, B, C, D, E, A); + for (; i < 60; ++i) + T_40_59(i, A, B, C, D, E); /* Round 4 */ - T_60_79(60, A, B, C, D, E); - T_60_79(61, E, A, B, C, D); - T_60_79(62, D, E, A, B, C); - T_60_79(63, C, D, E, A, B); - T_60_79(64, B, C, D, E, A); - T_60_79(65, A, B, C, D, E); - T_60_79(66, E, A, B, C, D); - T_60_79(67, D, E, A, B, C); - T_60_79(68, C, D, E, A, B); - T_60_79(69, B, C, D, E, A); - T_60_79(70, A, B, C, D, E); - T_60_79(71, E, A, B, C, D); - T_60_79(72, D, E, A, B, C); - T_60_79(73, C, D, E, A, B); - T_60_79(74, B, C, D, E, A); - T_60_79(75, A, B, C, D, E); - T_60_79(76, E, A, B, C, D); - T_60_79(77, D, E, A, B, C); - T_60_79(78, C, D, E, A, B); - T_60_79(79, B, C, D, E, A); + for (; i < 80; ++i) + T_60_79(i, A, B, C, D, E); digest[0] += A; digest[1] += B; diff --git a/lib/siphash.c b/lib/siphash.c index 025f0cbf6d7a..b4055b1cc2f6 100644 --- a/lib/siphash.c +++ b/lib/siphash.c @@ -18,19 +18,13 @@ #include #endif -#define SIPROUND \ - do { \ - v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \ - v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \ - v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \ - v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \ - } while (0) +#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3) #define PREAMBLE(len) \ - u64 v0 = 0x736f6d6570736575ULL; \ - u64 v1 = 0x646f72616e646f6dULL; \ - u64 v2 = 0x6c7967656e657261ULL; \ - u64 v3 = 0x7465646279746573ULL; \ + u64 v0 = SIPHASH_CONST_0; \ + u64 v1 = SIPHASH_CONST_1; \ + u64 v2 = SIPHASH_CONST_2; \ + u64 v3 = SIPHASH_CONST_3; \ u64 b = ((u64)(len)) << 56; \ v3 ^= key->key[1]; \ v2 ^= key->key[0]; \ @@ -389,19 +383,13 @@ u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, } EXPORT_SYMBOL(hsiphash_4u32); #else -#define HSIPROUND \ - do { \ - v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \ - v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \ - v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \ - v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \ - } while (0) +#define HSIPROUND HSIPHASH_PERMUTATION(v0, v1, v2, v3) #define HPREAMBLE(len) \ - u32 v0 = 0; \ - u32 v1 = 0; \ - u32 v2 = 0x6c796765U; \ - u32 v3 = 0x74656462U; \ + u32 v0 = HSIPHASH_CONST_0; \ + u32 v1 = HSIPHASH_CONST_1; \ + u32 v2 = HSIPHASH_CONST_2; \ + u32 v3 = HSIPHASH_CONST_3; \ u32 b = ((u32)(len)) << 24; \ v3 ^= key->key[1]; \ v2 ^= key->key[0]; \ diff --git a/lib/vsprintf.c b/lib/vsprintf.c index ed2f709de0db..93293eb04df9 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -756,14 +756,16 @@ static void enable_ptr_key_workfn(struct work_struct *work) static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); -static void fill_random_ptr_key(struct random_ready_callback *unused) +static int fill_random_ptr_key(struct notifier_block *nb, + unsigned long action, void *data) { /* This may be in an interrupt handler. */ queue_work(system_unbound_wq, &enable_ptr_key_work); + return 0; } -static struct random_ready_callback random_ready = { - .func = fill_random_ptr_key +static struct notifier_block random_ready = { + .notifier_call = fill_random_ptr_key }; static int __init initialize_ptr_random(void) @@ -777,7 +779,7 @@ static int __init initialize_ptr_random(void) return 0; } - ret = add_random_ready_callback(&random_ready); + ret = register_random_ready_notifier(&random_ready); if (!ret) { return 0; } else if (ret == -EALREADY) { diff --git a/mm/compaction.c b/mm/compaction.c index 822e6221b0a9..9922a6f12e7d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1772,6 +1772,8 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) update_fast_start_pfn(cc, free_pfn); pfn = pageblock_start_pfn(free_pfn); + if (pfn < cc->zone->zone_start_pfn) + pfn = cc->zone->zone_start_pfn; cc->fast_search_fail = 0; found_block = true; set_pageblock_skip(freepage); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fb145839f299..bf7efe630e76 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5505,7 +5505,14 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, pud_clear(pud); put_page(virt_to_page(ptep)); mm_dec_nr_pmds(mm); - *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; + /* + * This update of passed address optimizes loops sequentially + * processing addresses in increments of huge page size (PMD_SIZE + * in this case). By clearing the pud, a PUD_SIZE area is unmapped. + * Update address to the 'last page' in the cleared area so that + * calling loop can move to first page past this area. + */ + *addr |= PUD_SIZE - PMD_SIZE; return 1; } diff --git a/mm/memory.c b/mm/memory.c index b9083b1aba08..85554eca3da9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4609,6 +4609,19 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) static vm_fault_t create_huge_pud(struct vm_fault *vmf) { +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + /* No support for anonymous transparent PUD pages yet */ + if (vma_is_anonymous(vmf->vma)) + return VM_FAULT_FALLBACK; + if (vmf->vma->vm_ops->huge_fault) + return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + return VM_FAULT_FALLBACK; +} + +static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) +{ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) /* No support for anonymous transparent PUD pages yet */ @@ -4623,19 +4636,7 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf) split: /* COW or write-notify not handled on PUD level: split pud.*/ __split_huge_pud(vmf->vma, vmf->pud, vmf->address); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - return VM_FAULT_FALLBACK; -} - -static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) -{ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* No support for anonymous transparent PUD pages yet */ - if (vma_is_anonymous(vmf->vma)) - return VM_FAULT_FALLBACK; - if (vmf->vma->vm_ops->huge_fault) - return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ return VM_FAULT_FALLBACK; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 274562c53f2c..c4c1c7bf51d7 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -374,7 +374,7 @@ static void mpol_rebind_preferred(struct mempolicy *pol, */ static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) { - if (!pol) + if (!pol || pol->mode == MPOL_LOCAL) return; if (!mpol_store_user_nodemask(pol) && !(pol->flags & MPOL_F_LOCAL) && nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9bb27db5be15..222afa645639 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3837,11 +3837,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, * need to be calculated. */ if (!order) { - long fast_free; + long usable_free; + long reserved; - fast_free = free_pages; - fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags); - if (fast_free > mark + z->lowmem_reserve[highest_zoneidx]) + usable_free = free_pages; + reserved = __zone_watermark_unusable_free(z, 0, alloc_flags); + + /* reserved may over estimate high-atomic reserves. */ + usable_free -= min(usable_free, reserved); + if (usable_free > mark + z->lowmem_reserve[highest_zoneidx]) return true; } diff --git a/mm/slub.c b/mm/slub.c index 135d1be1c2ac..8817d0d0c8ea 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2351,6 +2351,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, c->page = NULL; c->freelist = NULL; + c->tid = next_tid(c->tid); } /* @@ -2484,8 +2485,6 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { stat(s, CPUSLAB_FLUSH); deactivate_slab(s, c->page, c->freelist, c); - - c->tid = next_tid(c->tid); } /* @@ -2771,6 +2770,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (!freelist) { c->page = NULL; + c->tid = next_tid(c->tid); stat(s, DEACTIVATE_BYPASS); goto new_slab; } diff --git a/mm/util.c b/mm/util.c index 8a859bd9d41f..f9075c5923e4 100644 --- a/mm/util.c +++ b/mm/util.c @@ -334,6 +334,38 @@ unsigned long randomize_stack_top(unsigned long stack_top) #endif } +/** + * randomize_page - Generate a random, page aligned address + * @start: The smallest acceptable address the caller will take. + * @range: The size of the area, starting at @start, within which the + * random address must fall. + * + * If @start + @range would overflow, @range is capped. + * + * NOTE: Historical use of randomize_range, which this replaces, presumed that + * @start was already page aligned. We now align it regardless. + * + * Return: A page aligned address within [start, start + range). On error, + * @start is returned. + */ +unsigned long randomize_page(unsigned long start, unsigned long range) +{ + if (!PAGE_ALIGNED(start)) { + range -= PAGE_ALIGN(start) - start; + start = PAGE_ALIGN(start); + } + + if (start > ULONG_MAX - range) + range = ULONG_MAX - start; + + range >>= PAGE_SHIFT; + + if (range == 0) + return start; + + return start + (get_random_long() % range << PAGE_SHIFT); +} + #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT unsigned long arch_randomize_brk(struct mm_struct *mm) { diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 6fce68d224b2..1b309c67d1d6 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1748,11 +1748,40 @@ static enum fullness_group putback_zspage(struct size_class *class, */ static void lock_zspage(struct zspage *zspage) { - struct page *page = get_first_page(zspage); + struct page *curr_page, *page; - do { - lock_page(page); - } while ((page = get_next_page(page)) != NULL); + /* + * Pages we haven't locked yet can be migrated off the list while we're + * trying to lock them, so we need to be careful and only attempt to + * lock each page under migrate_read_lock(). Otherwise, the page we lock + * may no longer belong to the zspage. This means that we may wait for + * the wrong page to unlock, so we must take a reference to the page + * prior to waiting for it to unlock outside migrate_read_lock(). + */ + while (1) { + migrate_read_lock(zspage); + page = get_first_page(zspage); + if (trylock_page(page)) + break; + get_page(page); + migrate_read_unlock(zspage); + wait_on_page_locked(page); + put_page(page); + } + + curr_page = page; + while ((page = get_next_page(curr_page))) { + if (trylock_page(page)) { + curr_page = page; + } else { + get_page(page); + migrate_read_unlock(zspage); + wait_on_page_locked(page); + put_page(page); + migrate_read_lock(zspage); + } + } + migrate_read_unlock(zspage); } static int zs_init_fs_context(struct fs_context *fc) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index d12c9a8a9953..64a94c9812da 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -278,9 +278,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) return 0; out_free_newdev: - if (new_dev->reg_state == NETREG_UNINITIALIZED || - new_dev->reg_state == NETREG_UNREGISTERED) - free_netdev(new_dev); + free_netdev(new_dev); return err; } diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 5fff027f25fa..a1f4cb836fcf 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1653,9 +1653,12 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; - struct sk_buff *skb; + struct sk_buff *skb, *last; + struct sk_buff_head *sk_queue; int copied; int err = 0; + int off = 0; + long timeo; lock_sock(sk); /* @@ -1667,11 +1670,29 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, goto out; } - /* Now we can treat all alike */ - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); - if (skb == NULL) - goto out; + /* We need support for non-blocking reads. */ + sk_queue = &sk->sk_receive_queue; + skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last); + /* If no packet is available, release_sock(sk) and try again. */ + if (!skb) { + if (err != -EAGAIN) + goto out; + release_sock(sk); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err, + &timeo, last)) { + skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, + &err, &last); + if (skb) + break; + + if (err != -EAGAIN) + goto done; + } + if (!skb) + goto done; + lock_sock(sk); + } if (!sk_to_ax25(sk)->pidincl) skb_pull(skb, 1); /* Remove PID */ @@ -1718,6 +1739,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, out: release_sock(sk); +done: return err; } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index ecd2ffcf2ba2..140d9764c77e 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -240,7 +240,7 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason) { BT_DBG("hcon %p", conn); - /* When we are master of an established connection and it enters + /* When we are central of an established connection and it enters * the disconnect timeout, then go ahead and try to read the * current clock offset. Processing of the result is done * within the event handling and hci_clock_offset_evt function. @@ -1065,16 +1065,16 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, hci_req_init(&req, hdev); - /* Disable advertising if we're active. For master role + /* Disable advertising if we're active. For central role * connections most controllers will refuse to connect if - * advertising is enabled, and for slave role connections we + * advertising is enabled, and for peripheral role connections we * anyway have to disable it in order to start directed * advertising. */ if (hci_dev_test_flag(hdev, HCI_LE_ADV)) __hci_req_disable_advertising(&req); - /* If requested to connect as slave use directed advertising */ + /* If requested to connect as peripheral use directed advertising */ if (conn->role == HCI_ROLE_SLAVE) { /* If we're active scanning most controllers are unable * to initiate advertising. Simply reject the attempt. diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e926e80d9731..061ef20e135e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2759,9 +2759,9 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) bacpy(&cp.bdaddr, &ev->bdaddr); if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) - cp.role = 0x00; /* Become master */ + cp.role = 0x00; /* Become central */ else - cp.role = 0x01; /* Remain slave */ + cp.role = 0x01; /* Remain peripheral */ hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp); } else if (!(flags & HCI_PROTO_DEFER)) { @@ -5153,7 +5153,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn->dst_type = bdaddr_type; /* If we didn't have a hci_conn object previously - * but we're in master role this must be something + * but we're in central role this must be something * initiated using a white list. Since white list based * connections are not "first class citizens" we don't * have full tracking of them. Therefore, we go ahead diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index ee7ae88d4557..a21df117edff 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -796,6 +796,10 @@ static u8 update_white_list(struct hci_request *req) */ bool allow_rpa = hdev->suspended; + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + allow_rpa = true; + /* Go through the current white list programmed into the * controller one by one and check if that address is still * in the list of pending connections or list of devices to diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 6e8a9c4299b0..7b4752587363 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1711,8 +1711,8 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) if (hcon->out) smp_conn_security(hcon, hcon->pending_sec_level); - /* For LE slave connections, make sure the connection interval - * is in the range of the minium and maximum interval that has + /* For LE peripheral connections, make sure the connection interval + * is in the range of the minimum and maximum interval that has * been configured for this connection. If not, then trigger * the connection update procedure. */ @@ -7574,7 +7574,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, BT_DBG("chan %p, len %d", chan, skb->len); /* If we receive data on a fixed channel before the info req/rsp - * procdure is done simply assume that the channel is supported + * procedure is done simply assume that the channel is supported * and mark it as ready. */ if (chan->chan_type == L2CAP_CHAN_FIXED) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index f2bacb464ccf..7324764384b6 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -549,22 +549,58 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel) return dlc; } -int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) +static int rfcomm_dlc_send_frag(struct rfcomm_dlc *d, struct sk_buff *frag) { - int len = skb->len; - - if (d->state != BT_CONNECTED) - return -ENOTCONN; + int len = frag->len; BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len); if (len > d->mtu) return -EINVAL; - rfcomm_make_uih(skb, d->addr); - skb_queue_tail(&d->tx_queue, skb); + rfcomm_make_uih(frag, d->addr); + __skb_queue_tail(&d->tx_queue, frag); - if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags)) + return len; +} + +int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) +{ + unsigned long flags; + struct sk_buff *frag, *next; + int len; + + if (d->state != BT_CONNECTED) + return -ENOTCONN; + + frag = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; + + /* Queue all fragments atomically. */ + spin_lock_irqsave(&d->tx_queue.lock, flags); + + len = rfcomm_dlc_send_frag(d, skb); + if (len < 0 || !frag) + goto unlock; + + for (; frag; frag = next) { + int ret; + + next = frag->next; + + ret = rfcomm_dlc_send_frag(d, frag); + if (ret < 0) { + kfree_skb(frag); + goto unlock; + } + + len += ret; + } + +unlock: + spin_unlock_irqrestore(&d->tx_queue.lock, flags); + + if (len > 0 && !test_bit(RFCOMM_TX_THROTTLED, &d->flags)) rfcomm_schedule(); return len; } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ae6f80730561..4cf1fa9900ca 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -575,47 +575,21 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); sent = bt_sock_wait_ready(sk, msg->msg_flags); - if (sent) - goto done; - while (len) { - size_t size = min_t(size_t, len, d->mtu); - int err; - - skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) { - if (sent == 0) - sent = err; - break; - } - skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err) { - kfree_skb(skb); - if (sent == 0) - sent = err; - break; - } - - skb->priority = sk->sk_priority; - - err = rfcomm_dlc_send(d, skb); - if (err < 0) { - kfree_skb(skb); - if (sent == 0) - sent = err; - break; - } - - sent += size; - len -= size; - } - -done: release_sock(sk); + if (sent) + return sent; + + skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE, + RFCOMM_SKB_TAIL_RESERVE); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + sent = rfcomm_dlc_send(d, skb); + if (sent < 0) + kfree_skb(skb); + return sent; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 2f2b8ddc4dd5..8244d3ae185b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -280,12 +280,10 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) return err; } -static int sco_send_frame(struct sock *sk, void *buf, int len, - unsigned int msg_flags) +static int sco_send_frame(struct sock *sk, struct sk_buff *skb) { struct sco_conn *conn = sco_pi(sk)->conn; - struct sk_buff *skb; - int err; + int len = skb->len; /* Check outgoing MTU */ if (len > conn->mtu) @@ -293,11 +291,6 @@ static int sco_send_frame(struct sock *sk, void *buf, int len, BT_DBG("sk %p len %d", sk, len); - skb = bt_skb_send_alloc(sk, len, msg_flags & MSG_DONTWAIT, &err); - if (!skb) - return err; - - memcpy(skb_put(skb, len), buf, len); hci_send_sco(conn->hcon, skb); return len; @@ -575,19 +568,24 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen addr->sa_family != AF_BLUETOOTH) return -EINVAL; - if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) - return -EBADFD; + lock_sock(sk); + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { + err = -EBADFD; + goto done; + } - if (sk->sk_type != SOCK_SEQPACKET) - return -EINVAL; + if (sk->sk_type != SOCK_SEQPACKET) { + err = -EINVAL; + goto done; + } hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR); - if (!hdev) - return -EHOSTUNREACH; + if (!hdev) { + err = -EHOSTUNREACH; + goto done; + } hci_dev_lock(hdev); - lock_sock(sk); - /* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr); @@ -722,7 +720,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; - void *buf; + struct sk_buff *skb; int err; BT_DBG("sock %p, sk %p", sock, sk); @@ -734,24 +732,21 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; - buf = kmalloc(len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (memcpy_from_msg(buf, msg, len)) { - kfree(buf); - return -EFAULT; - } + skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0); + if (IS_ERR(skb)) + return PTR_ERR(skb); lock_sock(sk); if (sk->sk_state == BT_CONNECTED) - err = sco_send_frame(sk, buf, len, msg->msg_flags); + err = sco_send_frame(sk, skb); else err = -ENOTCONN; release_sock(sk); - kfree(buf); + + if (err < 0) + kfree_skb(skb); return err; } diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 2b7879afc333..b7374dbee23a 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -909,8 +909,8 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, hcon->pending_sec_level = BT_SECURITY_HIGH; } - /* If both devices have Keyoard-Display I/O, the master - * Confirms and the slave Enters the passkey. + /* If both devices have Keyboard-Display I/O, the initiator + * Confirms and the responder Enters the passkey. */ if (smp->method == OVERLAP) { if (hcon->role == HCI_ROLE_MASTER) @@ -3076,7 +3076,7 @@ static void bredr_pairing(struct l2cap_chan *chan) if (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags)) return; - /* Only master may initiate SMP over BR/EDR */ + /* Only initiator may initiate SMP over BR/EDR */ if (hcon->role != HCI_ROLE_MASTER) return; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index eb684f31fd69..f8b231bbbe38 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -10,20 +10,86 @@ #include #include #include +#include #include #include +#include #define CREATE_TRACE_POINTS #include +struct bpf_test_timer { + enum { NO_PREEMPT, NO_MIGRATE } mode; + u32 i; + u64 time_start, time_spent; +}; + +static void bpf_test_timer_enter(struct bpf_test_timer *t) + __acquires(rcu) +{ + rcu_read_lock(); + if (t->mode == NO_PREEMPT) + preempt_disable(); + else + migrate_disable(); + + t->time_start = ktime_get_ns(); +} + +static void bpf_test_timer_leave(struct bpf_test_timer *t) + __releases(rcu) +{ + t->time_start = 0; + + if (t->mode == NO_PREEMPT) + preempt_enable(); + else + migrate_enable(); + rcu_read_unlock(); +} + +static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration) + __must_hold(rcu) +{ + t->i++; + if (t->i >= repeat) { + /* We're done. */ + t->time_spent += ktime_get_ns() - t->time_start; + do_div(t->time_spent, t->i); + *duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent; + *err = 0; + goto reset; + } + + if (signal_pending(current)) { + /* During iteration: we've been cancelled, abort. */ + *err = -EINTR; + goto reset; + } + + if (need_resched()) { + /* During iteration: we need to reschedule between runs. */ + t->time_spent += ktime_get_ns() - t->time_start; + bpf_test_timer_leave(t); + cond_resched(); + bpf_test_timer_enter(t); + } + + /* Do another round. */ + return true; + +reset: + t->i = 0; + return false; +} + static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time, bool xdp) { struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; + struct bpf_test_timer t = { NO_MIGRATE }; enum bpf_cgroup_storage_type stype; - u64 time_start, time_spent = 0; - int ret = 0; - u32 i; + int ret; for_each_cgroup_storage_type(stype) { storage[stype] = bpf_cgroup_storage_alloc(prog, stype); @@ -38,10 +104,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, if (!repeat) repeat = 1; - rcu_read_lock(); - migrate_disable(); - time_start = ktime_get_ns(); - for (i = 0; i < repeat; i++) { + bpf_test_timer_enter(&t); + do { ret = bpf_cgroup_storage_set(storage); if (ret) break; @@ -53,29 +117,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, bpf_cgroup_storage_unset(); - if (signal_pending(current)) { - ret = -EINTR; - break; - } - - if (need_resched()) { - time_spent += ktime_get_ns() - time_start; - migrate_enable(); - rcu_read_unlock(); - - cond_resched(); - - rcu_read_lock(); - migrate_disable(); - time_start = ktime_get_ns(); - } - } - time_spent += ktime_get_ns() - time_start; - migrate_enable(); - rcu_read_unlock(); - - do_div(time_spent, repeat); - *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + } while (bpf_test_timer_continue(&t, repeat, &ret, time)); + bpf_test_timer_leave(&t); for_each_cgroup_storage_type(stype) bpf_cgroup_storage_free(storage[stype]); @@ -688,18 +731,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { + struct bpf_test_timer t = { NO_PREEMPT }; u32 size = kattr->test.data_size_in; struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; struct bpf_flow_keys *user_ctx; struct bpf_flow_keys flow_keys; - u64 time_start, time_spent = 0; const struct ethhdr *eth; unsigned int flags = 0; u32 retval, duration; void *data; int ret; - u32 i; if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) return -EINVAL; @@ -735,39 +777,15 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, ctx.data = data; ctx.data_end = (__u8 *)data + size; - rcu_read_lock(); - preempt_disable(); - time_start = ktime_get_ns(); - for (i = 0; i < repeat; i++) { + bpf_test_timer_enter(&t); + do { retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, size, flags); + } while (bpf_test_timer_continue(&t, repeat, &ret, &duration)); + bpf_test_timer_leave(&t); - if (signal_pending(current)) { - preempt_enable(); - rcu_read_unlock(); - - ret = -EINTR; - goto out; - } - - if (need_resched()) { - time_spent += ktime_get_ns() - time_start; - preempt_enable(); - rcu_read_unlock(); - - cond_resched(); - - rcu_read_lock(); - preempt_disable(); - time_start = ktime_get_ns(); - } - } - time_spent += ktime_get_ns() - time_start; - preempt_enable(); - rcu_read_unlock(); - - do_div(time_spent, repeat); - duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + if (ret < 0) + goto out; ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), retval, duration); @@ -780,3 +798,106 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, kfree(data); return ret; } + +int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + struct bpf_test_timer t = { NO_PREEMPT }; + struct bpf_prog_array *progs = NULL; + struct bpf_sk_lookup_kern ctx = {}; + u32 repeat = kattr->test.repeat; + struct bpf_sk_lookup *user_ctx; + u32 retval, duration; + int ret = -EINVAL; + + if (prog->type != BPF_PROG_TYPE_SK_LOOKUP) + return -EINVAL; + + if (kattr->test.flags || kattr->test.cpu) + return -EINVAL; + + if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out || + kattr->test.data_size_out) + return -EINVAL; + + if (!repeat) + repeat = 1; + + user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx)); + if (IS_ERR(user_ctx)) + return PTR_ERR(user_ctx); + + if (!user_ctx) + return -EINVAL; + + if (user_ctx->sk) + goto out; + + if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) + goto out; + + if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) { + ret = -ERANGE; + goto out; + } + + ctx.family = (u16)user_ctx->family; + ctx.protocol = (u16)user_ctx->protocol; + ctx.dport = (u16)user_ctx->local_port; + ctx.sport = (__force __be16)user_ctx->remote_port; + + switch (ctx.family) { + case AF_INET: + ctx.v4.daddr = (__force __be32)user_ctx->local_ip4; + ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4; + break; + +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6; + ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6; + break; +#endif + + default: + ret = -EAFNOSUPPORT; + goto out; + } + + progs = bpf_prog_array_alloc(1, GFP_KERNEL); + if (!progs) { + ret = -ENOMEM; + goto out; + } + + progs->items[0].prog = prog; + + bpf_test_timer_enter(&t); + do { + ctx.selected_sk = NULL; + retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN); + } while (bpf_test_timer_continue(&t, repeat, &ret, &duration)); + bpf_test_timer_leave(&t); + + if (ret < 0) + goto out; + + user_ctx->cookie = 0; + if (ctx.selected_sk) { + if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) { + ret = -EOPNOTSUPP; + goto out; + } + + user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); + } + + ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); + +out: + bpf_prog_array_free(progs); + kfree(user_ctx); + return ret; +} diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 59a318b9f646..bf5bf148091f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -43,6 +43,13 @@ static int br_pass_frame_up(struct sk_buff *skb) u64_stats_update_end(&brstats->syncp); vg = br_vlan_group_rcu(br); + + /* Reset the offload_fwd_mark because there could be a stacked + * bridge above, and it should not think this bridge it doing + * that bridge's work forwarding out its ports. + */ + br_switchdev_frame_unmark(skb); + /* Bridge is just like any other port. Make sure the * packet is allowed except in promisc modue when someone * may be running packet capture. diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 68c0d0f92890..10a2c7bca719 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1012,9 +1012,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, return okfn(net, sk, skb); ops = nf_hook_entries_get_hook_ops(e); - for (i = 0; i < e->num_hook_entries && - ops[i]->priority <= NF_BR_PRI_BRNF; i++) - ; + for (i = 0; i < e->num_hook_entries; i++) { + /* These hooks have already been called */ + if (ops[i]->priority < NF_BR_PRI_BRNF) + continue; + + /* These hooks have not been called yet, run them. */ + if (ops[i]->priority > NF_BR_PRI_BRNF) + break; + + /* take a closer look at NF_BR_PRI_BRNF. */ + if (ops[i]->hook == br_nf_pre_routing) { + /* This hook diverted the skb to this function, + * hooks after this have not been run yet. + */ + i++; + break; + } + } nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 8e8ffac037cd..97805ec424c1 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -87,9 +87,8 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, return nft_meta_get_init(ctx, expr, tb); } - priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static struct nft_expr_type nft_meta_bridge_type; diff --git a/net/can/bcm.c b/net/can/bcm.c index 0928a39c4423..e918a0f3cda2 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -100,6 +100,7 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset) struct bcm_op { struct list_head list; + struct rcu_head rcu; int ifindex; canid_t can_id; u32 flags; @@ -718,10 +719,9 @@ static struct bcm_op *bcm_find_op(struct list_head *ops, return NULL; } -static void bcm_remove_op(struct bcm_op *op) +static void bcm_free_op_rcu(struct rcu_head *rcu_head) { - hrtimer_cancel(&op->timer); - hrtimer_cancel(&op->thrtimer); + struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu); if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); @@ -732,6 +732,14 @@ static void bcm_remove_op(struct bcm_op *op) kfree(op); } +static void bcm_remove_op(struct bcm_op *op) +{ + hrtimer_cancel(&op->timer); + hrtimer_cancel(&op->thrtimer); + + call_rcu(&op->rcu, bcm_free_op_rcu); +} + static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op) { if (op->rx_reg_dev == dev) { @@ -757,6 +765,9 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { + /* disable automatic timer on frame reception */ + op->flags |= RX_NO_AUTOTIMER; + /* * Don't care if we're bound or not (due to netdev * problems) can_rx_unregister() is always a save @@ -785,7 +796,6 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, bcm_rx_handler, op); list_del(&op->list); - synchronize_rcu(); bcm_remove_op(op); return 1; /* done */ } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 7901ab6c79fd..1e9fab79e245 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req) target_init(&req->r_t); } -/* - * This is ugly, but it allows us to reuse linger registration and ping - * requests, keeping the structure of the code around send_linger{_ping}() - * reasonable. Setting up a min_nr=2 mempool for each linger request - * and dealing with copying ops (this blasts req only, watch op remains - * intact) isn't any better. - */ -static void request_reinit(struct ceph_osd_request *req) -{ - struct ceph_osd_client *osdc = req->r_osdc; - bool mempool = req->r_mempool; - unsigned int num_ops = req->r_num_ops; - u64 snapid = req->r_snapid; - struct ceph_snap_context *snapc = req->r_snapc; - bool linger = req->r_linger; - struct ceph_msg *request_msg = req->r_request; - struct ceph_msg *reply_msg = req->r_reply; - - dout("%s req %p\n", __func__, req); - WARN_ON(kref_read(&req->r_kref) != 1); - request_release_checks(req); - - WARN_ON(kref_read(&request_msg->kref) != 1); - WARN_ON(kref_read(&reply_msg->kref) != 1); - target_destroy(&req->r_t); - - request_init(req); - req->r_osdc = osdc; - req->r_mempool = mempool; - req->r_num_ops = num_ops; - req->r_snapid = snapid; - req->r_snapc = snapc; - req->r_linger = linger; - req->r_request = request_msg; - req->r_reply = reply_msg; -} - struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, unsigned int num_ops, @@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init); * @watch_opcode: CEPH_OSD_WATCH_OP_* */ static void osd_req_op_watch_init(struct ceph_osd_request *req, int which, - u64 cookie, u8 watch_opcode) + u8 watch_opcode, u64 cookie, u32 gen) { struct ceph_osd_req_op *op; op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); op->watch.cookie = cookie; op->watch.op = watch_opcode; - op->watch.gen = 0; + op->watch.gen = gen; +} + +/* + * prot_ver, timeout and notify payload (may be empty) should already be + * encoded in @request_pl + */ +static void osd_req_op_notify_init(struct ceph_osd_request *req, int which, + u64 cookie, struct ceph_pagelist *request_pl) +{ + struct ceph_osd_req_op *op; + + op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); + op->notify.cookie = cookie; + + ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl); + op->indata_len = request_pl->length; } /* @@ -2727,10 +2706,13 @@ static void linger_release(struct kref *kref) WARN_ON(!list_empty(&lreq->pending_lworks)); WARN_ON(lreq->osd); - if (lreq->reg_req) - ceph_osdc_put_request(lreq->reg_req); - if (lreq->ping_req) - ceph_osdc_put_request(lreq->ping_req); + if (lreq->request_pl) + ceph_pagelist_release(lreq->request_pl); + if (lreq->notify_id_pages) + ceph_release_page_vector(lreq->notify_id_pages, 1); + + ceph_osdc_put_request(lreq->reg_req); + ceph_osdc_put_request(lreq->ping_req); target_destroy(&lreq->t); kfree(lreq); } @@ -2999,6 +2981,12 @@ static void linger_commit_cb(struct ceph_osd_request *req) struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); + if (req != lreq->reg_req) { + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", + __func__, lreq, lreq->linger_id, req, lreq->reg_req); + goto out; + } + dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq, lreq->linger_id, req->r_result); linger_reg_commit_complete(lreq, req->r_result); @@ -3022,6 +3010,7 @@ static void linger_commit_cb(struct ceph_osd_request *req) } } +out: mutex_unlock(&lreq->lock); linger_put(lreq); } @@ -3044,6 +3033,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req) struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); + if (req != lreq->reg_req) { + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", + __func__, lreq, lreq->linger_id, req, lreq->reg_req); + goto out; + } + dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__, lreq, lreq->linger_id, req->r_result, lreq->last_error); if (req->r_result < 0) { @@ -3053,46 +3048,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req) } } +out: mutex_unlock(&lreq->lock); linger_put(lreq); } static void send_linger(struct ceph_osd_linger_request *lreq) { - struct ceph_osd_request *req = lreq->reg_req; - struct ceph_osd_req_op *op = &req->r_ops[0]; + struct ceph_osd_client *osdc = lreq->osdc; + struct ceph_osd_request *req; + int ret; - verify_osdc_wrlocked(req->r_osdc); + verify_osdc_wrlocked(osdc); + mutex_lock(&lreq->lock); dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - if (req->r_osd) - cancel_linger_request(req); + if (lreq->reg_req) { + if (lreq->reg_req->r_osd) + cancel_linger_request(lreq->reg_req); + ceph_osdc_put_request(lreq->reg_req); + } + + req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); + BUG_ON(!req); - request_reinit(req); target_copy(&req->r_t, &lreq->t); req->r_mtime = lreq->mtime; - mutex_lock(&lreq->lock); if (lreq->is_watch && lreq->committed) { - WARN_ON(op->op != CEPH_OSD_OP_WATCH || - op->watch.cookie != lreq->linger_id); - op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT; - op->watch.gen = ++lreq->register_gen; + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT, + lreq->linger_id, ++lreq->register_gen); dout("lreq %p reconnect register_gen %u\n", lreq, - op->watch.gen); + req->r_ops[0].watch.gen); req->r_callback = linger_reconnect_cb; } else { - if (!lreq->is_watch) + if (lreq->is_watch) { + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH, + lreq->linger_id, 0); + } else { lreq->notify_id = 0; - else - WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH); + + refcount_inc(&lreq->request_pl->refcnt); + osd_req_op_notify_init(req, 0, lreq->linger_id, + lreq->request_pl); + ceph_osd_data_pages_init( + osd_req_op_data(req, 0, notify, response_data), + lreq->notify_id_pages, PAGE_SIZE, 0, false, false); + } dout("lreq %p register\n", lreq); req->r_callback = linger_commit_cb; } - mutex_unlock(&lreq->lock); + + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + BUG_ON(ret); req->r_priv = linger_get(lreq); req->r_linger = true; + lreq->reg_req = req; + mutex_unlock(&lreq->lock); submit_request(req, true); } @@ -3102,6 +3115,12 @@ static void linger_ping_cb(struct ceph_osd_request *req) struct ceph_osd_linger_request *lreq = req->r_priv; mutex_lock(&lreq->lock); + if (req != lreq->ping_req) { + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", + __func__, lreq, lreq->linger_id, req, lreq->ping_req); + goto out; + } + dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n", __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent, lreq->last_error); @@ -3117,6 +3136,7 @@ static void linger_ping_cb(struct ceph_osd_request *req) lreq->register_gen, req->r_ops[0].watch.gen); } +out: mutex_unlock(&lreq->lock); linger_put(lreq); } @@ -3124,8 +3144,8 @@ static void linger_ping_cb(struct ceph_osd_request *req) static void send_linger_ping(struct ceph_osd_linger_request *lreq) { struct ceph_osd_client *osdc = lreq->osdc; - struct ceph_osd_request *req = lreq->ping_req; - struct ceph_osd_req_op *op = &req->r_ops[0]; + struct ceph_osd_request *req; + int ret; if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { dout("%s PAUSERD\n", __func__); @@ -3137,19 +3157,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq) __func__, lreq, lreq->linger_id, lreq->ping_sent, lreq->register_gen); - if (req->r_osd) - cancel_linger_request(req); + if (lreq->ping_req) { + if (lreq->ping_req->r_osd) + cancel_linger_request(lreq->ping_req); + ceph_osdc_put_request(lreq->ping_req); + } + + req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); + BUG_ON(!req); - request_reinit(req); target_copy(&req->r_t, &lreq->t); - - WARN_ON(op->op != CEPH_OSD_OP_WATCH || - op->watch.cookie != lreq->linger_id || - op->watch.op != CEPH_OSD_WATCH_OP_PING); - op->watch.gen = lreq->register_gen; + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id, + lreq->register_gen); req->r_callback = linger_ping_cb; + + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + BUG_ON(ret); + req->r_priv = linger_get(lreq); req->r_linger = true; + lreq->ping_req = req; ceph_osdc_get_request(req); account_request(req); @@ -3165,12 +3192,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq) down_write(&osdc->lock); linger_register(lreq); - if (lreq->is_watch) { - lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id; - lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id; - } else { - lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id; - } calc_target(osdc, &lreq->t, false); osd = lookup_create_osd(osdc, lreq->t.osd, true); @@ -3202,9 +3223,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq) */ static void __linger_cancel(struct ceph_osd_linger_request *lreq) { - if (lreq->is_watch && lreq->ping_req->r_osd) + if (lreq->ping_req && lreq->ping_req->r_osd) cancel_linger_request(lreq->ping_req); - if (lreq->reg_req->r_osd) + if (lreq->reg_req && lreq->reg_req->r_osd) cancel_linger_request(lreq->reg_req); cancel_linger_map_check(lreq); unlink_linger(lreq->osd, lreq); @@ -4651,43 +4672,6 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) } EXPORT_SYMBOL(ceph_osdc_sync); -static struct ceph_osd_request * -alloc_linger_request(struct ceph_osd_linger_request *lreq) -{ - struct ceph_osd_request *req; - - req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO); - if (!req) - return NULL; - - ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); - ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); - return req; -} - -static struct ceph_osd_request * -alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode) -{ - struct ceph_osd_request *req; - - req = alloc_linger_request(lreq); - if (!req) - return NULL; - - /* - * Pass 0 for cookie because we don't know it yet, it will be - * filled in by linger_submit(). - */ - osd_req_op_watch_init(req, 0, 0, watch_opcode); - - if (ceph_osdc_alloc_messages(req, GFP_NOIO)) { - ceph_osdc_put_request(req); - return NULL; - } - - return req; -} - /* * Returns a handle, caller owns a ref. */ @@ -4717,18 +4701,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, lreq->t.flags = CEPH_OSD_FLAG_WRITE; ktime_get_real_ts64(&lreq->mtime); - lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH); - if (!lreq->reg_req) { - ret = -ENOMEM; - goto err_put_lreq; - } - - lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING); - if (!lreq->ping_req) { - ret = -ENOMEM; - goto err_put_lreq; - } - linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (ret) { @@ -4766,8 +4738,8 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE; ktime_get_real_ts64(&req->r_mtime); - osd_req_op_watch_init(req, 0, lreq->linger_id, - CEPH_OSD_WATCH_OP_UNWATCH); + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH, + lreq->linger_id, 0); ret = ceph_osdc_alloc_messages(req, GFP_NOIO); if (ret) @@ -4853,35 +4825,6 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, } EXPORT_SYMBOL(ceph_osdc_notify_ack); -static int osd_req_op_notify_init(struct ceph_osd_request *req, int which, - u64 cookie, u32 prot_ver, u32 timeout, - void *payload, u32 payload_len) -{ - struct ceph_osd_req_op *op; - struct ceph_pagelist *pl; - int ret; - - op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); - op->notify.cookie = cookie; - - pl = ceph_pagelist_alloc(GFP_NOIO); - if (!pl) - return -ENOMEM; - - ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */ - ret |= ceph_pagelist_encode_32(pl, timeout); - ret |= ceph_pagelist_encode_32(pl, payload_len); - ret |= ceph_pagelist_append(pl, payload, payload_len); - if (ret) { - ceph_pagelist_release(pl); - return -ENOMEM; - } - - ceph_osd_data_pagelist_init(&op->notify.request_data, pl); - op->indata_len = pl->length; - return 0; -} - /* * @timeout: in seconds * @@ -4900,7 +4843,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, size_t *preply_len) { struct ceph_osd_linger_request *lreq; - struct page **pages; int ret; WARN_ON(!timeout); @@ -4913,6 +4855,29 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, if (!lreq) return -ENOMEM; + lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO); + if (!lreq->request_pl) { + ret = -ENOMEM; + goto out_put_lreq; + } + + ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */ + ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout); + ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len); + ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len); + if (ret) { + ret = -ENOMEM; + goto out_put_lreq; + } + + /* for notify_id */ + lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO); + if (IS_ERR(lreq->notify_id_pages)) { + ret = PTR_ERR(lreq->notify_id_pages); + lreq->notify_id_pages = NULL; + goto out_put_lreq; + } + lreq->preply_pages = preply_pages; lreq->preply_len = preply_len; @@ -4920,35 +4885,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, ceph_oloc_copy(&lreq->t.base_oloc, oloc); lreq->t.flags = CEPH_OSD_FLAG_READ; - lreq->reg_req = alloc_linger_request(lreq); - if (!lreq->reg_req) { - ret = -ENOMEM; - goto out_put_lreq; - } - - /* - * Pass 0 for cookie because we don't know it yet, it will be - * filled in by linger_submit(). - */ - ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout, - payload, payload_len); - if (ret) - goto out_put_lreq; - - /* for notify_id */ - pages = ceph_alloc_page_vector(1, GFP_NOIO); - if (IS_ERR(pages)) { - ret = PTR_ERR(pages); - goto out_put_lreq; - } - ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify, - response_data), - pages, PAGE_SIZE, 0, false, true); - - ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO); - if (ret) - goto out_put_lreq; - linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (!ret) diff --git a/net/core/dev.c b/net/core/dev.c index a6342c3b8afb..7abd12dabbd4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3246,11 +3246,15 @@ int skb_checksum_help(struct sk_buff *skb) } offset = skb_checksum_start_offset(skb); - BUG_ON(offset >= skb_headlen(skb)); + ret = -EINVAL; + if (WARN_ON_ONCE(offset >= skb_headlen(skb))) + goto out; + csum = skb_checksum(skb, offset, skb->len - offset, 0); offset += skb->csum_offset; - BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); + if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) + goto out; ret = skb_ensure_writable(skb, offset + sizeof(__sum16)); if (ret) @@ -5751,7 +5755,7 @@ static void flush_all_backlogs(void) } /* we can have in flight packet[s] on the cpus we are not flushing, - * synchronize_net() in rollback_registered_many() will take care of + * synchronize_net() in unregister_netdevice_many() will take care of * them */ for_each_cpu(cpu, &flush_cpus) @@ -9509,106 +9513,6 @@ static void net_set_todo(struct net_device *dev) dev_net(dev)->dev_unreg_count++; } -static void rollback_registered_many(struct list_head *head) -{ - struct net_device *dev, *tmp; - LIST_HEAD(close_head); - - BUG_ON(dev_boot_phase); - ASSERT_RTNL(); - - list_for_each_entry_safe(dev, tmp, head, unreg_list) { - /* Some devices call without registering - * for initialization unwind. Remove those - * devices and proceed with the remaining. - */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - pr_debug("unregister_netdevice: device %s/%p never was registered\n", - dev->name, dev); - - WARN_ON(1); - list_del(&dev->unreg_list); - continue; - } - dev->dismantle = true; - BUG_ON(dev->reg_state != NETREG_REGISTERED); - } - - /* If device is running, close it first. */ - list_for_each_entry(dev, head, unreg_list) - list_add_tail(&dev->close_list, &close_head); - dev_close_many(&close_head, true); - - list_for_each_entry(dev, head, unreg_list) { - /* And unlink it from device chain. */ - unlist_netdevice(dev); - - dev->reg_state = NETREG_UNREGISTERING; - } - flush_all_backlogs(); - - synchronize_net(); - - list_for_each_entry(dev, head, unreg_list) { - struct sk_buff *skb = NULL; - - /* Shutdown queueing discipline. */ - dev_shutdown(dev); - - dev_xdp_uninstall(dev); - - /* Notify protocols, that we are about to destroy - * this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - - if (!dev->rtnl_link_ops || - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, - GFP_KERNEL, NULL, 0); - - /* - * Flush the unicast and multicast chains - */ - dev_uc_flush(dev); - dev_mc_flush(dev); - - netdev_name_node_alt_flush(dev); - netdev_name_node_free(dev->name_node); - - if (dev->netdev_ops->ndo_uninit) - dev->netdev_ops->ndo_uninit(dev); - - if (skb) - rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); - - /* Notifier chain MUST detach us all upper devices. */ - WARN_ON(netdev_has_any_upper_dev(dev)); - WARN_ON(netdev_has_any_lower_dev(dev)); - - /* Remove entries from kobject tree */ - netdev_unregister_kobject(dev); -#ifdef CONFIG_XPS - /* Remove XPS queueing entries */ - netif_reset_xps_queues_gt(dev, 0); -#endif - } - - synchronize_net(); - - list_for_each_entry(dev, head, unreg_list) - dev_put(dev); -} - -static void rollback_registered(struct net_device *dev) -{ - LIST_HEAD(single); - - list_add(&dev->unreg_list, &single); - rollback_registered_many(&single); - list_del(&single); -} - static netdev_features_t netdev_sync_upper_features(struct net_device *lower, struct net_device *upper, netdev_features_t features) { @@ -10145,17 +10049,10 @@ int register_netdevice(struct net_device *dev) ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); if (ret) { - rollback_registered(dev); - rcu_barrier(); - - dev->reg_state = NETREG_UNREGISTERED; - /* We should put the kobject that hold in - * netdev_unregister_kobject(), otherwise - * the net device cannot be freed when - * driver calls free_netdev(), because the - * kobject is being hold. - */ - kobject_put(&dev->dev.kobj); + /* Expect explicit free_netdev() on failure */ + dev->needs_free_netdev = false; + unregister_netdevice_queue(dev, NULL); + goto out; } /* * Prevent userspace races by waiting until the network @@ -10684,6 +10581,17 @@ void free_netdev(struct net_device *dev) struct napi_struct *p, *n; might_sleep(); + + /* When called immediately after register_netdevice() failed the unwind + * handling may still be dismantling the device. Handle that case by + * deferring the free. + */ + if (dev->reg_state == NETREG_UNREGISTERING) { + ASSERT_RTNL(); + dev->needs_free_netdev = true; + return; + } + netif_free_tx_queues(dev); netif_free_rx_queues(dev); @@ -10750,9 +10658,10 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) if (head) { list_move_tail(&dev->unreg_list, head); } else { - rollback_registered(dev); - /* Finish processing unregister after unlock */ - net_set_todo(dev); + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + unregister_netdevice_many(&single); } } EXPORT_SYMBOL(unregister_netdevice_queue); @@ -10766,14 +10675,100 @@ EXPORT_SYMBOL(unregister_netdevice_queue); */ void unregister_netdevice_many(struct list_head *head) { - struct net_device *dev; + struct net_device *dev, *tmp; + LIST_HEAD(close_head); - if (!list_empty(head)) { - rollback_registered_many(head); - list_for_each_entry(dev, head, unreg_list) - net_set_todo(dev); - list_del(head); + BUG_ON(dev_boot_phase); + ASSERT_RTNL(); + + if (list_empty(head)) + return; + + list_for_each_entry_safe(dev, tmp, head, unreg_list) { + /* Some devices call without registering + * for initialization unwind. Remove those + * devices and proceed with the remaining. + */ + if (dev->reg_state == NETREG_UNINITIALIZED) { + pr_debug("unregister_netdevice: device %s/%p never was registered\n", + dev->name, dev); + + WARN_ON(1); + list_del(&dev->unreg_list); + continue; + } + dev->dismantle = true; + BUG_ON(dev->reg_state != NETREG_REGISTERED); } + + /* If device is running, close it first. */ + list_for_each_entry(dev, head, unreg_list) + list_add_tail(&dev->close_list, &close_head); + dev_close_many(&close_head, true); + + list_for_each_entry(dev, head, unreg_list) { + /* And unlink it from device chain. */ + unlist_netdevice(dev); + + dev->reg_state = NETREG_UNREGISTERING; + } + flush_all_backlogs(); + + synchronize_net(); + + list_for_each_entry(dev, head, unreg_list) { + struct sk_buff *skb = NULL; + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + dev_xdp_uninstall(dev); + + /* Notify protocols, that we are about to destroy + * this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + if (!dev->rtnl_link_ops || + dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, + GFP_KERNEL, NULL, 0); + + /* + * Flush the unicast and multicast chains + */ + dev_uc_flush(dev); + dev_mc_flush(dev); + + netdev_name_node_alt_flush(dev); + netdev_name_node_free(dev->name_node); + + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); + + if (skb) + rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); + + /* Notifier chain MUST detach us all upper devices. */ + WARN_ON(netdev_has_any_upper_dev(dev)); + WARN_ON(netdev_has_any_lower_dev(dev)); + + /* Remove entries from kobject tree */ + netdev_unregister_kobject(dev); +#ifdef CONFIG_XPS + /* Remove XPS queueing entries */ + netif_reset_xps_queues_gt(dev, 0); +#endif + } + + synchronize_net(); + + list_for_each_entry(dev, head, unreg_list) { + dev_put(dev); + net_set_todo(dev); + } + + list_del(head); } EXPORT_SYMBOL(unregister_netdevice_many); diff --git a/net/core/filter.c b/net/core/filter.c index 7cb5946022eb..d8d04869d719 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1687,7 +1687,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) return -EINVAL; - if (unlikely(offset > 0xffff)) + if (unlikely(offset > INT_MAX)) return -EFAULT; if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; @@ -1722,7 +1722,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, { void *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely(offset > INT_MAX)) goto err_clear; ptr = skb_header_pointer(skb, offset, len, to); @@ -5606,7 +5606,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len if (err) return err; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return seg6_lookup_nexthop(skb, NULL, 0); @@ -5964,10 +5963,21 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, ifindex, proto, netns_id, flags); if (sk) { - sk = sk_to_full_sk(sk); - if (!sk_fullsock(sk)) { + struct sock *sk2 = sk_to_full_sk(sk); + + /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk + * sock refcnt is decremented to prevent a request_sock leak. + */ + if (!sk_fullsock(sk2)) + sk2 = NULL; + if (sk2 != sk) { sock_gen_put(sk); - return NULL; + /* Ensure there is no need to bump sk2 refcnt */ + if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) { + WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); + return NULL; + } + sk = sk2; } } @@ -6001,10 +6011,21 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, flags); if (sk) { - sk = sk_to_full_sk(sk); - if (!sk_fullsock(sk)) { + struct sock *sk2 = sk_to_full_sk(sk); + + /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk + * sock refcnt is decremented to prevent a request_sock leak. + */ + if (!sk_fullsock(sk2)) + sk2 = NULL; + if (sk2 != sk) { sock_gen_put(sk); - return NULL; + /* Ensure there is no need to bump sk2 refcnt */ + if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) { + WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); + return NULL; + } + sk = sk2; } } @@ -6468,7 +6489,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -EINVAL; if (!th->ack || th->rst || th->syn) @@ -6543,7 +6564,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -ENOENT; if (!th->syn || th->ack || th->fin || th->rst) @@ -10295,6 +10316,7 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, } const struct bpf_prog_ops sk_lookup_prog_ops = { + .test_run = bpf_prog_test_run_sk_lookup, }; const struct bpf_verifier_ops sk_lookup_verifier_ops = { diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index e3f0d5906811..8d958290b7d2 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -566,3 +566,9 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0; } EXPORT_SYMBOL(flow_indr_dev_setup_offload); + +bool flow_indr_dev_exists(void) +{ + return !list_empty(&flow_block_indr_dev_list); +} +EXPORT_SYMBOL(flow_indr_dev_exists); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 873081cda950..3c9c2d6e3b92 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3442,26 +3442,15 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, dev->ifindex = ifm->ifi_index; - if (ops->newlink) { + if (ops->newlink) err = ops->newlink(link_net ? : net, dev, tb, data, extack); - /* Drivers should call free_netdev() in ->destructor - * and unregister it on failure after registration - * so that device could be finally freed in rtnl_unlock. - */ - if (err < 0) { - /* If device is not registered at all, free it now */ - if (dev->reg_state == NETREG_UNINITIALIZED || - dev->reg_state == NETREG_UNREGISTERED) - free_netdev(dev); - goto out; - } - } else { + else err = register_netdevice(dev); - if (err < 0) { - free_netdev(dev); - goto out; - } + if (err < 0) { + free_netdev(dev); + goto out; } + err = rtnl_configure_link(dev, ifm); if (err < 0) goto out_unregister; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index b8a33c841846..189eea1372d5 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net, .daddr = *(struct in6_addr *)daddr, }; - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); @@ -96,7 +96,7 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, } EXPORT_SYMBOL(secure_tcpv6_seq); -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, +u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) { const struct { @@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #ifdef CONFIG_INET u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); @@ -146,7 +146,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, } EXPORT_SYMBOL_GPL(secure_tcp_seq); -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); return siphash_4u32((__force u32)saddr, (__force u32)daddr, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b0b6e6a4784e..2455b0c0e486 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -464,7 +464,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, .fl4_dport = dccp_hdr(skb)->dccph_sport, }; - security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 49f4034bf126..2be5c69824f9 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -203,7 +203,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req fl6.flowi6_oif = ireq->ir_iif; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = htons(ireq->ir_num); - security_req_classify_flow(req, flowi6_to_flowi(&fl6)); + security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); rcu_read_lock(); @@ -279,7 +279,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) fl6.flowi6_oif = inet6_iif(rxskb); fl6.fl6_dport = dccp_hdr(skb)->dccph_dport; fl6.fl6_sport = dccp_hdr(skb)->dccph_sport; - security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); + security_skb_classify_flow(rxskb, flowi6_to_flowi_common(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); @@ -912,7 +912,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 482a4538e315..0494caae8d59 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -220,7 +220,7 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { @@ -338,7 +338,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, inet->hdrincl = 1; } - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -1249,7 +1249,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (new_saddr == old_saddr) return 0; - if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } @@ -1304,7 +1304,7 @@ int inet_sk_rebuild_header(struct sock *sk) * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index ca217a6f488f..d4a4160159a9 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -240,7 +240,7 @@ static int cipso_v4_cache_check(const unsigned char *key, struct cipso_v4_map_cache_entry *prev_entry = NULL; u32 hash; - if (!cipso_v4_cache_enabled) + if (!READ_ONCE(cipso_v4_cache_enabled)) return -ENOENT; hash = cipso_v4_map_cache_hash(key, key_len); @@ -297,13 +297,14 @@ static int cipso_v4_cache_check(const unsigned char *key, int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { + int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize); int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; u32 cipso_ptr_len; - if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0) return 0; cipso_ptr_len = cipso_ptr[1]; @@ -323,7 +324,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); - if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + if (cipso_v4_cache[bkt].size < bkt_size) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; } else { @@ -1200,7 +1201,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, /* This will send packets using the "optimized" format when * possible as specified in section 3.4.2.6 of the * CIPSO draft. */ - if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) + if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 && + ret_val <= 10) tag_len = 14; else tag_len = 4 + ret_val; @@ -1604,7 +1606,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) * all the CIPSO validations here but it doesn't * really specify _exactly_ what we need to validate * ... so, just make it a sysctl tunable. */ - if (cipso_v4_rbm_strictvalid) { + if (READ_ONCE(cipso_v4_rbm_strictvalid)) { if (cipso_v4_map_lvl_valid(doi_def, tag[3]) < 0) { err_offset = opt_iter + 3; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c8c7b76c3b2e..3824b7abecf7 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1229,7 +1229,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, nh->fib_nh_dev = in_dev->dev; dev_hold(nh->fib_nh_dev); - nh->fib_nh_scope = RT_SCOPE_HOST; + nh->fib_nh_scope = RT_SCOPE_LINK; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; @@ -1831,7 +1831,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (nexthop_is_blackhole(fi->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode) + if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) goto offload; } @@ -2232,7 +2232,7 @@ void fib_select_multipath(struct fib_result *res, int hash) } change_nexthops(fi) { - if (net->ipv4.sysctl_fib_multipath_use_neigh) { + if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { if (!fib_good_nh(nexthop_nh)) continue; if (!first) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ffc5332f1390..a28f525e2c47 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -497,7 +497,7 @@ static void tnode_free(struct key_vector *tn) tn = container_of(head, struct tnode, rcu)->kv; } - if (tnode_free_size >= sysctl_fib_sync_mem) { + if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) { tnode_free_size = 0; synchronize_rcu(); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b71b836cc7d1..a1aacf5e75a6 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -261,11 +261,12 @@ bool icmp_global_allow(void) spin_lock(&icmp_global.lock); delta = min_t(u32, now - icmp_global.stamp, HZ); if (delta >= HZ / 50) { - incr = sysctl_icmp_msgs_per_sec * delta / HZ ; + incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; if (incr) WRITE_ONCE(icmp_global.stamp, now); } - credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); + credit = min_t(u32, icmp_global.credit + incr, + READ_ONCE(sysctl_icmp_msgs_burst)); if (credit) { /* We want to use a credit of one in average, but need to randomize * it for security reasons. @@ -289,7 +290,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return true; /* Limit if icmp type is enabled in ratemask. */ - if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) + if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask))) return true; return false; @@ -327,7 +328,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, vif = l3mdev_master_ifindex(dst->dev); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); - rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); + rc = inet_peer_xrlim_allow(peer, + READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); if (peer) inet_putpeer(peer); out: @@ -447,7 +449,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); - security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) goto out_unlock; @@ -503,7 +505,7 @@ static struct rtable *icmp_route_lookup(struct net *net, route_lookup_dev = icmp_get_route_lookup_dev(skb_in); fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev); - security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); + security_skb_classify_flow(skb_in, flowi4_to_flowi_common(fl4)); rt = ip_route_output_key_hash(net, fl4, skb_in); if (IS_ERR(rt)) return rt; @@ -885,7 +887,7 @@ static bool icmp_unreach(struct sk_buff *skb) * values please see * Documentation/networking/ip-sysctl.rst */ - switch (net->ipv4.sysctl_ip_no_pmtu_disc) { + switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) { default: net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n", &iph->daddr); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index e7f4c365919f..46196b50c9d6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -467,7 +467,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; - if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(pmc->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return skb; mtu = READ_ONCE(dev->mtu); @@ -593,7 +594,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(pmc->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -736,7 +737,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return 0; if (type == IGMP_HOST_LEAVE_MESSAGE) @@ -818,7 +820,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + in_dev->mr_ifc_count = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); igmp_ifc_start_timer(in_dev, 1); } @@ -913,7 +915,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return false; - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return false; rcu_read_lock(); @@ -999,7 +1002,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * received value was zero, use the default or statically * configured value. */ - in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; /* RFC3376, 8.3. Query Response Interval: @@ -1038,7 +1041,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&im->lock); if (im->tm_running) @@ -1179,7 +1182,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1230,9 +1233,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) swap(im->tomb, pmc->tomb); swap(im->sources, pmc->sources); for (psf = im->sources; psf; psf = psf->sf_next) - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } else { - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } in_dev_put(pmc->interface); kfree_pmc(pmc); @@ -1289,7 +1294,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; reporter = im->reporter; @@ -1331,13 +1337,14 @@ static void igmp_group_added(struct ip_mc_list *im) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; if (in_dev->dead) return; - im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; + im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { spin_lock_bh(&im->lock); igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); @@ -1351,7 +1358,7 @@ static void igmp_group_added(struct ip_mc_list *im) * IN() to IN(A). */ if (im->sfmode == MCAST_EXCLUDE) - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); igmp_ifc_event(in_dev); #endif @@ -1635,7 +1642,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; /* a failover is happening and switches @@ -1742,7 +1749,7 @@ static void ip_mc_reset(struct in_device *in_dev) in_dev->mr_qi = IGMP_QUERY_INTERVAL; in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; - in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv); } #else static void ip_mc_reset(struct in_device *in_dev) @@ -1876,7 +1883,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1940,7 +1947,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2119,7 +2126,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2185,7 +2192,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr, count++; } err = -ENOBUFS; - if (count >= net->ipv4.sysctl_igmp_max_memberships) + if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships)) goto done; iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); if (!iml) @@ -2372,7 +2379,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } /* else, add a new source to the filter */ - if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) { + if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { err = -ENOBUFS; goto done; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1dfa561e8f98..c4303cbc9dc0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -251,7 +251,7 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int * goto other_half_scan; } - if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { + if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) { /* We still have a chance to connect to different destinations */ relax = true; goto ports_exhausted; @@ -602,7 +602,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, htons(ireq->ir_num), sk->sk_uid); - security_req_classify_flow(req, flowi4_to_flowi(fl4)); + security_req_classify_flow(req, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; @@ -640,7 +640,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, htons(ireq->ir_num), sk->sk_uid); - security_req_classify_flow(req, flowi4_to_flowi(fl4)); + security_req_classify_flow(req, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index a17315622143..e3d9d6700f44 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -504,7 +504,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; } -static u32 inet_sk_port_offset(const struct sock *sk) +static u64 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -722,8 +722,21 @@ void inet_unhash(struct sock *sk) } EXPORT_SYMBOL_GPL(inet_unhash); +/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm + * Note that we use 32bit integers (vs RFC 'short integers') + * because 2^16 is not a multiple of num_ephemeral and this + * property might be used by clever attacker. + * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though + * attacks were since demonstrated, thus we use 65536 instead to really + * give more isolation and privacy, at the expense of 256kB of kernel + * memory. + */ +#define INET_TABLE_PERTURB_SHIFT 16 +#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) +static u32 *table_perturb; + int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)) { @@ -735,8 +748,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct inet_bind_bucket *tb; u32 remaining, offset; int ret, i, low, high; - static u32 hint; int l3mdev; + u32 index; if (port) { head = &hinfo->bhash[inet_bhashfn(net, port, @@ -763,7 +776,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U; - offset = (hint + port_offset) % remaining; + net_get_random_once(table_perturb, + INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); + index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); + + offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); + offset %= remaining; + /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ @@ -817,7 +836,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; ok: - hint += i + 2; + /* Here we want to add a little bit of randomness to the next source + * port that will be chosen. We use a max() with a random here so that + * on low contention the randomness is maximal and on high contention + * it may be inexistent. + */ + i = max_t(int, i, (prandom_u32() & 7) * 2); + WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, port); @@ -840,7 +865,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); @@ -890,6 +915,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, low_limit, high_limit); init_hashinfo_lhash2(h); + + /* this one is used for source ports of outgoing connections */ + table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE, + sizeof(*table_perturb), GFP_KERNEL); + if (!table_perturb) + panic("TCP: failed to alloc table_perturb"); } int inet_hashinfo2_init_mod(struct inet_hashinfo *h) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ff327a62c9ce..a18668552d33 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -148,16 +148,20 @@ static void inet_peer_gc(struct inet_peer_base *base, struct inet_peer *gc_stack[], unsigned int gc_cnt) { + int peer_threshold, peer_maxttl, peer_minttl; struct inet_peer *p; __u32 delta, ttl; int i; - if (base->total >= inet_peer_threshold) + peer_threshold = READ_ONCE(inet_peer_threshold); + peer_maxttl = READ_ONCE(inet_peer_maxttl); + peer_minttl = READ_ONCE(inet_peer_minttl); + + if (base->total >= peer_threshold) ttl = 0; /* be aggressive */ else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - base->total / inet_peer_threshold * HZ; + ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ * + base->total / peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 00ec819f949b..29730edda220 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -151,7 +151,7 @@ int ip_forward(struct sk_buff *skb) !skb_sec_path(skb)) ip_rt_send_redirect(skb); - if (net->ipv4.sysctl_ip_fwd_update_priority) + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority)) skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a80038575d2..6ab5c50aa7a8 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -519,7 +519,6 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) int tunnel_hlen; int version; int nhoff; - int thoff; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@ -553,10 +552,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; - thoff = skb_transport_header(skb) - skb_mac_header(skb); - if (skb->protocol == htons(ETH_P_IPV6) && - (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) - truncate = true; + if (skb->protocol == htons(ETH_P_IPV6)) { + int thoff; + + if (skb_transport_header_was_set(skb)) + thoff = skb_transport_header(skb) - skb_mac_header(skb); + else + thoff = nhoff + sizeof(struct ipv6hdr); + if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) + truncate = true; + } if (version == 1) { erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), @@ -624,21 +629,20 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, } if (dev->header_ops) { - const int pull_len = tunnel->hlen + sizeof(struct iphdr); - if (skb_cow_head(skb, 0)) goto free_skb; tnl_params = (const struct iphdr *)skb->data; - if (pull_len > skb_transport_offset(skb)) - goto free_skb; - /* Pull skb since ip_tunnel_xmit() needs skb->data pointing * to gre header. */ - skb_pull(skb, pull_len); + skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); skb_reset_mac_header(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start(skb) < skb->data) + goto free_skb; } else { if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5e48b3d3a00d..f77b0af3cb65 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1712,7 +1712,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, daddr, saddr, tcp_hdr(skb)->source, tcp_hdr(skb)->dest, arg->uid); - security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ec6036713e2c..22507a6a3f71 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -783,7 +783,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; if (gsf->gf_numsrc >= 0x1ffffff || - gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) goto out_free_gsf; err = -EINVAL; @@ -832,7 +832,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; - if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) goto out_free_gsf; err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, &gf32->gf_group, gf32->gf_slist); @@ -1242,7 +1242,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, } /* numsrc >= (1G-4) overflow in 32 bits */ if (msf->imsf_numsrc >= 0x3ffffffcU || - msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { + msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { kfree(msf); err = -ENOBUFS; break; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index e25be2d01a7a..4b74c67f13c9 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -410,7 +410,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, u32 mtu = dst_mtu(encap_dst) - headroom; if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) || - (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu)) + (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu)) return 0; skb_dst_update_pmtu_no_confirm(skb, mtu); diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index bcdb37f86a94..aeb631760eb9 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -13,8 +13,8 @@ #include struct nft_dup_ipv4 { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_dev:8; + u8 sreg_addr; + u8 sreg_dev; }; static void nft_dup_ipv4_eval(const struct nft_expr *expr, @@ -40,16 +40,16 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_ADDR] == NULL) return -EINVAL; - priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr)); + err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, + sizeof(struct in_addr)); if (err < 0) return err; - if (tb[NFTA_DUP_SREG_DEV] != NULL) { - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); - } - return 0; + if (tb[NFTA_DUP_SREG_DEV]) + err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], + &priv->sreg_dev, sizeof(int)); + + return err; } static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 4dbc628f8c38..11b66f8f2b70 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -882,7 +882,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) /* __ip6_del_rt does a release, so do a hold here */ fib6_info_hold(f6i); ipv6_stub->ip6_del_rt(net, f6i, - !net->ipv4.sysctl_nexthop_compat_mode); + !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); } } @@ -1173,7 +1173,8 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, if (!rc) { nh_base_seq_inc(net); nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); - if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) + if (replace_notify && + READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)) nexthop_replace_notify(net, new_nh, &cfg->nlinfo); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2853a3f0fc63..1bad851b3fc3 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -796,7 +796,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4.fl4_icmp_type = user_icmph.type; fl4.fl4_icmp_code = user_icmph.code; - security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5d95f80314f9..4899ebe569eb 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -640,7 +640,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto done; } - security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4080e3c6c50d..216f76b548ff 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1442,7 +1442,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) struct fib_info *fi = res->fi; u32 mtu = 0; - if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) mtu = fi->fib_mtu; @@ -1776,7 +1776,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, flags |= RTCF_LOCAL; rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + IN_DEV_ORCONF(in_dev, NOPOLICY), false); if (!rth) return -ENOBUFS; @@ -1893,8 +1893,8 @@ static int __mkroute_input(struct sk_buff *skb, } rth = rt_dst_alloc(out_dev->dev, 0, res->type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(out_dev, NOXFRM)); + IN_DEV_ORCONF(in_dev, NOPOLICY), + IN_DEV_ORCONF(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; goto cleanup; @@ -2276,7 +2276,7 @@ out: return err; rth = rt_dst_alloc(ip_rt_get_dev(net, res), flags | RTCF_LOCAL, res->type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + IN_DEV_ORCONF(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; @@ -2499,8 +2499,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, add: rth = rt_dst_alloc(dev_out, flags, type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(in_dev, NOXFRM)); + IN_DEV_ORCONF(in_dev, NOPOLICY), + IN_DEV_ORCONF(in_dev, NOXFRM)); if (!rth) return ERR_PTR(-ENOBUFS); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0b616094e794..41afc9155f31 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -249,12 +249,12 @@ bool cookie_timestamp_decode(const struct net *net, return true; } - if (!net->ipv4.sysctl_tcp_timestamps) + if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) return false; tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; - if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack) + if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) return false; if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) @@ -263,7 +263,7 @@ bool cookie_timestamp_decode(const struct net *net, tcp_opt->wscale_ok = 1; tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; - return net->ipv4.sysctl_tcp_window_scaling != 0; + return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } EXPORT_SYMBOL(cookie_timestamp_decode); @@ -342,7 +342,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct flowi4 fl4; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) @@ -424,7 +425,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); - security_req_classify_flow(req, flowi4_to_flowi(&fl4)); + security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { reqsk_free(req); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0c039e986372..c089723489f2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -95,7 +95,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, * port limit. */ if ((range[1] < range[0]) || - (range[0] < net->ipv4.sysctl_ip_prot_sock)) + (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock))) ret = -EINVAL; else set_local_port_range(net, range); @@ -121,7 +121,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, .extra2 = &ip_privileged_port_max, }; - pports = net->ipv4.sysctl_ip_prot_sock; + pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); @@ -133,7 +133,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, if (range[0] < pports) ret = -EINVAL; else - net->ipv4.sysctl_ip_prot_sock = pports; + WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports); } return ret; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ba28fc514fc1..e31eaa70edbb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -442,7 +442,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; + tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering); tcp_assign_congestion_control(sk); tp->tsoffset = 0; @@ -700,7 +700,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && - sock_net(sk)->ipv4.sysctl_tcp_autocorking && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && !tcp_rtx_queue_empty(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } @@ -1150,7 +1150,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; @@ -3369,7 +3370,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; - } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else @@ -3706,7 +3708,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_LINGER2: val = tp->linger2; if (val >= 0) - val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ; + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 107111984384..39fb037ce5f3 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -349,7 +349,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk, const struct dst_entry *dst, int flag) { - return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) || + return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) || tcp_sk(sk)->fastopen_no_cookie || (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); } @@ -364,7 +364,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, const struct dst_entry *dst) { bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; - int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; int ret = 0; @@ -506,7 +506,7 @@ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); - if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)) return; /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ @@ -527,7 +527,8 @@ void tcp_fastopen_active_disable(struct sock *sk) */ bool tcp_fastopen_active_should_disable(struct sock *sk) { - unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; + unsigned int tfo_bh_timeout = + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout); unsigned long timeout; int tfo_da_times; int multiplier; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index afb546cf2c5d..ff2ab936b571 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -504,7 +504,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) */ static void tcp_init_buffer_space(struct sock *sk) { - int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; + int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win); struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -694,7 +694,7 @@ void tcp_rcv_space_adjust(struct sock *sk) * */ - if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvmem, rcvbuf; u64 rcvwin, grow; @@ -1012,7 +1012,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, tp->undo_marker ? tp->undo_retrans : 0); #endif tp->reordering = min_t(u32, (metric + mss - 1) / mss, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); } /* This exciting event is worth to be remembered. 8) */ @@ -1991,7 +1991,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) return; tp->reordering = min_t(u32, tp->packets_out + addend, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); tp->reord_seen++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } @@ -2056,7 +2056,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp) static bool tcp_is_rack(const struct sock *sk) { - return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION; + return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_LOSS_DETECTION; } /* If we detect SACK reneging, forget all SACK information @@ -2100,6 +2101,7 @@ void tcp_enter_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; + u8 reordering; tcp_timeout_mark_lost(sk); @@ -2120,10 +2122,12 @@ void tcp_enter_loss(struct sock *sk) /* Timeout in disordered state after receiving substantial DUPACKs * suggests that the degree of reordering is over-estimated. */ + reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); if (icsk->icsk_ca_state <= TCP_CA_Disorder && - tp->sacked_out >= net->ipv4.sysctl_tcp_reordering) + tp->sacked_out >= reordering) tp->reordering = min_t(unsigned int, tp->reordering, - net->ipv4.sysctl_tcp_reordering); + reordering); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); @@ -2132,7 +2136,7 @@ void tcp_enter_loss(struct sock *sk) * loss recovery is underway except recurring timeout(s) on * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing */ - tp->frto = net->ipv4.sysctl_tcp_frto && + tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) && (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } @@ -2668,12 +2672,15 @@ static void tcp_mtup_probe_success(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + u64 val; - /* FIXME: breaks with very large cwnd */ tp->prior_ssthresh = tcp_current_ssthresh(sk); - tp->snd_cwnd = tp->snd_cwnd * - tcp_mss_to_mtu(sk, tp->mss_cache) / - icsk->icsk_mtup.probe_size; + + val = (u64)tp->snd_cwnd * tcp_mss_to_mtu(sk, tp->mss_cache); + do_div(val, icsk->icsk_mtup.probe_size); + WARN_ON_ONCE((u32)val != val); + tp->snd_cwnd = max_t(u32, 1U, val); + tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_ssthresh = tcp_current_ssthresh(sk); @@ -2998,7 +3005,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) { - u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; + u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ; struct tcp_sock *tp = tcp_sk(sk); if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { @@ -3409,7 +3416,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * new SACK or ECE mark may first advance cwnd here and later reduce * cwnd in tcp_fastretrans_alert() based on more states. */ - if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering) + if (tcp_sk(sk)->reordering > + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering)) return flag & FLAG_FORWARD_PROGRESS; return flag & FLAG_DATA_ACKED; @@ -3521,7 +3529,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, if (*last_oow_ack_time) { s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); - if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { + if (0 <= elapsed && + elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } @@ -3569,7 +3578,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; if (now != challenge_timestamp) { - u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; + u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); u32 half = (ack_limit + 1) >> 1; challenge_timestamp = now; @@ -4001,7 +4010,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && net->ipv4.sysctl_tcp_window_scaling) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > TCP_MAX_WSCALE) { @@ -4017,7 +4026,7 @@ void tcp_parse_options(const struct net *net, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && net->ipv4.sysctl_tcp_timestamps))) { + (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -4025,7 +4034,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && net->ipv4.sysctl_tcp_sack) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) { opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } @@ -4360,7 +4369,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4407,7 +4416,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); - if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; tcp_rcv_spurious_retrans(sk, skb); @@ -5433,7 +5442,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) } if (!tcp_is_sack(tp) || - tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) + tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) goto send_now; if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { @@ -5454,11 +5463,12 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) if (tp->srtt_us && tp->srtt_us < rtt) rtt = tp->srtt_us; - delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, + delay = min_t(unsigned long, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), rtt * (NSEC_PER_USEC >> 3)/20); sock_hold(sk); hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), - sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns), HRTIMER_MODE_REL_PINNED_SOFT); } @@ -5486,7 +5496,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); - if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) + if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg)) ptr--; ptr += ntohl(th->seq); @@ -6682,11 +6692,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; - bool want_cookie = false; struct net *net = sock_net(sk); + bool want_cookie = false; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); #ifdef CONFIG_SYN_COOKIES - if (net->ipv4.sysctl_tcp_syncookies) { + if (syncookies) { msg = "Sending cookies"; want_cookie = true; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); @@ -6694,8 +6707,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) #endif __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - if (!queue->synflood_warned && - net->ipv4.sysctl_tcp_syncookies != 2 && + if (!queue->synflood_warned && syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, sk->sk_num, msg); @@ -6744,7 +6756,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, struct tcp_sock *tp = tcp_sk(sk); u16 mss; - if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 && !inet_csk_reqsk_queue_is_full(sk)) return 0; @@ -6778,13 +6790,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, bool want_cookie = false; struct dst_entry *dst; struct flowi fl; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. */ - if ((net->ipv4.sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { + if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; @@ -6838,10 +6852,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_free; if (!want_cookie && !isn) { + int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); + /* Kill the following clause, if you dislike this way. */ - if (!net->ipv4.sysctl_tcp_syncookies && - (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (net->ipv4.sysctl_max_syn_backlog >> 2)) && + if (!syncookies && + (max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 017cd666387f..0d165ce2d80a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -106,10 +106,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) { + int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse); const struct inet_timewait_sock *tw = inet_twsk(sktw); const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); struct tcp_sock *tp = tcp_sk(sk); - int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse; if (reuse == 2) { /* Still does not detect *everything* that goes through @@ -983,7 +983,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); - tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (inet_sk(sk)->tos & INET_ECN_MASK) : inet_sk(sk)->tos; @@ -1558,7 +1558,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ - if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; if (!dst) { @@ -1980,7 +1980,8 @@ int tcp_v4_rcv(struct sk_buff *skb) struct sock *nsk; sk = req->rsk_listener; - if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) { + if (unlikely(!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb) || + tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) { sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; @@ -2019,6 +2020,7 @@ int tcp_v4_rcv(struct sk_buff *skb) } goto discard_and_relse; } + nf_reset_ct(skb); if (nsk == sk) { reqsk_put(req); tcp_v4_restore_cb(skb); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 6b27c481fe18..f3ca6eea2ca3 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk) int m; sk_dst_confirm(sk); - if (net->ipv4.sysctl_tcp_nometrics_save || !dst) + if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst) return; rcu_read_lock(); @@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk) if (tcp_in_initial_slowstart(tp)) { /* Slow start still did not finish. */ - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val && (tp->snd_cwnd >> 1) > val) @@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk) } else if (!tcp_in_slow_start(tp) && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); @@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk) tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_ssthresh) >> 1); } - if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val && tp->snd_ssthresh > val) @@ -428,7 +428,8 @@ void tcp_update_metrics(struct sock *sk) if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val < tp->reordering && - tp->reordering != net->ipv4.sysctl_tcp_reordering) + tp->reordering != + READ_ONCE(net->ipv4.sysctl_tcp_reordering)) tcp_metric_set(tm, TCP_METRIC_REORDERING, tp->reordering); } @@ -462,7 +463,7 @@ void tcp_init_metrics(struct sock *sk) if (tcp_metric_locked(tm, TCP_METRIC_CWND)) tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); - val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ? + val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val) { tp->snd_ssthresh = val; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 62f5ef9e6f93..e42312321191 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -180,7 +180,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { + if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c9777617873c..3d29ce3ef9b6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - /* If this is the first data packet sent in response to the - * previous received data, - * and it is a reply for ato after last received packet, - * increase pingpong count. - */ - if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && - (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_inc_pingpong_cnt(sk); - tp->lsndtime = now; + + /* If it is a reply for ato after last received + * packet, enter pingpong mode. + */ + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + inet_csk_enter_pingpong_mode(sk); } /* Account for an ACK we sent. */ @@ -789,18 +786,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; @@ -1720,7 +1717,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); + mss_now = max(mss_now, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss)); return mss_now; } @@ -1763,10 +1761,10 @@ void tcp_mtup_init(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); - icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; + icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss)); icsk->icsk_mtup.probe_size = 0; if (icsk->icsk_mtup.enabled) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; @@ -1898,7 +1896,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) && (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); @@ -1986,7 +1984,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) min_tso = ca_ops->min_tso_segs ? ca_ops->min_tso_segs(sk) : - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); return min_t(u32, tso_segs, sk->sk_gso_max_segs); @@ -2277,7 +2275,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) u32 interval; s32 delta; - interval = net->ipv4.sysctl_tcp_probe_interval; + interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval); delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); @@ -2359,7 +2357,7 @@ static int tcp_mtu_probe(struct sock *sk) * probing process by not resetting search range to its orignal. */ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || - interval < net->ipv4.sysctl_tcp_probe_threshold) { + interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) { /* Check whether enough time has elaplased for * another round of probing. */ @@ -2501,7 +2499,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, - sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); limit <<= factor; if (static_branch_unlikely(&tcp_tx_delay_enabled) && @@ -2734,7 +2732,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) if (rcu_access_pointer(tp->fastopen_rsk)) return false; - early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; + early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans); /* Schedule a loss probe in 2*RTT for SACK capable connections * not in loss recovery, that are either limited by cwnd or application. */ @@ -3099,7 +3097,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, struct sk_buff *skb = to, *tmp; bool first = true; - if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)) return; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; @@ -3647,7 +3645,7 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr); - if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG @@ -3683,7 +3681,7 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sock_net(sk)->ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling), &rcv_wscale, rcv_wnd); @@ -4091,7 +4089,7 @@ void tcp_send_probe0(struct sock *sk) icsk->icsk_probes_out++; if (err <= 0) { - if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) + if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) icsk->icsk_backoff++; timeout = tcp_probe0_when(sk, TCP_RTO_MAX); } else { @@ -4115,8 +4113,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL, NULL); if (!res) { - __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); if (unlikely(tcp_passive_fastopen(sk))) tcp_sk(sk)->total_retrans++; trace_tcp_retransmit_synack(sk, req); diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 31fc178f42c0..21fc9859d421 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -19,7 +19,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) return 0; if (tp->sacked_out >= tp->reordering && - !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH)) + !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_NO_DUPTHRESH)) return 0; } @@ -190,7 +191,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); - if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || + if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_STATIC_REO_WND) || !rs->prior_delivered) return; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4ef08079ccfa..888683f2ff3e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) */ static int tcp_orphan_retries(struct sock *sk, bool alive) { - int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ + int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ if (sk->sk_err_soft && !alive) @@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) int mss; /* Black hole detection */ - if (!net->ipv4.sysctl_tcp_mtu_probing) + if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing)) return; if (!icsk->icsk_mtup.enabled) { @@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); - mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); + mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor)); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -242,14 +242,14 @@ static int tcp_write_timeout(struct sock *sk) retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; expired = icsk->icsk_retransmits >= retry_until; } else { - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); __dst_negative_advice(sk); } - retry_until = net->ipv4.sysctl_tcp_retries2; + retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < TCP_RTO_MAX; @@ -380,7 +380,7 @@ static void tcp_probe_timer(struct sock *sk) msecs_to_jiffies(icsk->icsk_user_timeout)) goto abort; - max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; + max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; @@ -574,7 +574,7 @@ void tcp_retransmit_timer(struct sock *sk) * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && - (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && + (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; @@ -585,7 +585,7 @@ void tcp_retransmit_timer(struct sock *sk) } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); out:; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index da857876e321..adc02652ef67 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1206,7 +1206,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) faddr, saddr, dport, inet->inet_sport, sk->sk_uid); - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index ea595c8549c7..cfd46222ef91 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -307,4 +307,3 @@ void __init xfrm4_protocol_init(void) { xfrm_input_register_afinfo(&xfrm4_input_afinfo); } -EXPORT_SYMBOL(xfrm4_protocol_init); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d2444dc68f72..d7b8102c6895 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -793,6 +793,7 @@ static void dev_forward_change(struct inet6_dev *idev) { struct net_device *dev; struct inet6_ifaddr *ifa; + LIST_HEAD(tmp_addr_list); if (!idev) return; @@ -811,14 +812,24 @@ static void dev_forward_change(struct inet6_dev *idev) } } + read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa->flags&IFA_F_TENTATIVE) continue; + list_add_tail(&ifa->if_list_aux, &tmp_addr_list); + } + read_unlock_bh(&idev->lock); + + while (!list_empty(&tmp_addr_list)) { + ifa = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifa->if_list_aux); if (idev->cnf.forwarding) addrconf_join_anycast(ifa); else addrconf_leave_anycast(ifa); } + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_FORWARDING, dev->ifindex, &idev->cnf); @@ -1095,10 +1106,6 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, goto out; } - if (net->ipv6.devconf_all->disable_policy || - idev->cnf.disable_policy) - f6i->dst_nopolicy = true; - neigh_parms_data_state_setall(idev->nd_parms); ifa->addr = *cfg->pfx; @@ -3739,7 +3746,8 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN; struct net *net = dev_net(dev); struct inet6_dev *idev; - struct inet6_ifaddr *ifa, *tmp; + struct inet6_ifaddr *ifa; + LIST_HEAD(tmp_addr_list); bool keep_addr = false; bool was_ready; int state, i; @@ -3831,16 +3839,23 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) write_lock_bh(&idev->lock); } - list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + list_for_each_entry(ifa, &idev->addr_list, if_list) + list_add_tail(&ifa->if_list_aux, &tmp_addr_list); + write_unlock_bh(&idev->lock); + + while (!list_empty(&tmp_addr_list)) { struct fib6_info *rt = NULL; bool keep; + ifa = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifa->if_list_aux); + addrconf_del_dad_work(ifa); keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && !addr_is_local(&ifa->addr); - write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); if (keep) { @@ -3871,15 +3886,14 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) addrconf_leave_solict(ifa->idev, &ifa->addr); } - write_lock_bh(&idev->lock); if (!keep) { + write_lock_bh(&idev->lock); list_del_rcu(&ifa->if_list); + write_unlock_bh(&idev->lock); in6_ifa_put(ifa); } } - write_unlock_bh(&idev->lock); - /* Step 5: Discard anycast and multicast list */ if (unregister) { ipv6_ac_destroy_dev(idev); @@ -4211,7 +4225,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, send_rs = send_mld && ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits != 0 && - (dev->flags&IFF_LOOPBACK) == 0; + (dev->flags & IFF_LOOPBACK) == 0 && + (dev->type != ARPHRD_TUNNEL); read_unlock_bh(&ifp->idev->lock); /* While dad is in progress mld report's source address is in6_addrany. diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c6db41b5063f..3cb5120b1c4a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -225,7 +225,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, inet->mc_list = NULL; inet->rcv_tos = 0; - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -821,7 +821,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sk->sk_uid; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); rcu_read_lock(); final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc8ad7ddecda..206f66310a88 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -60,7 +60,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) fl6->flowi6_oif = np->mcast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); } int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index cbab41d557b2..fd1f896115c1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -573,7 +573,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); - security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); np = inet6_sk(sk); @@ -755,7 +755,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; fl6.flowi6_uid = sock_net_uid(net, NULL); - security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); local_bh_disable(); sk = icmpv6_xmit_lock(net); @@ -1008,7 +1008,7 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, fl6->fl6_icmp_type = type; fl6->fl6_icmp_code = 0; fl6->flowi6_oif = oif; - security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); } static void __net_exit icmpv6_sk_exit(struct net *net) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e315526fa244..5a9f4d722f35 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -46,7 +46,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); fl6->flowi6_uid = sk->sk_uid; - security_req_classify_flow(req, flowi6_to_flowi(fl6)); + security_req_classify_flow(req, flowi6_to_flowi_common(fl6)); dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (IS_ERR(dst)) @@ -95,7 +95,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; fl6->flowi6_uid = sk->sk_uid; - security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); rcu_read_lock(); final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 0a2e7f228391..40203255ed88 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -308,7 +308,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; } -static u32 inet6_sk_port_offset(const struct sock *sk) +static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -320,7 +320,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - u32 port_offset = 0; + u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3f88ba6555ab..9e0890738d93 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -944,7 +944,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, __be16 proto; __u32 mtu; int nhoff; - int thoff; if (!pskb_inet_may_pull(skb)) goto tx_err; @@ -965,10 +964,16 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; - thoff = skb_transport_header(skb) - skb_mac_header(skb); - if (skb->protocol == htons(ETH_P_IPV6) && - (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) - truncate = true; + if (skb->protocol == htons(ETH_P_IPV6)) { + int thoff; + + if (skb_transport_header_was_set(skb)) + thoff = skb_transport_header(skb) - skb_mac_header(skb); + else + thoff = nhoff + sizeof(struct ipv6hdr); + if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) + truncate = true; + } if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen)) goto tx_err; diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 4aef6baaa55e..bf95513736c9 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -179,7 +179,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev); fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark); - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + security_skb_classify_flow(oldskb, flowi6_to_flowi_common(&fl6)); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 8b5193efb1f1..3a00d95e964e 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -13,8 +13,8 @@ #include struct nft_dup_ipv6 { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_dev:8; + u8 sreg_addr; + u8 sreg_dev; }; static void nft_dup_ipv6_eval(const struct nft_expr *expr, @@ -38,16 +38,16 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_ADDR] == NULL) return -EINVAL; - priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr)); + err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, + sizeof(struct in6_addr)); if (err < 0) return err; - if (tb[NFTA_DUP_SREG_DEV] != NULL) { - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); - } - return 0; + if (tb[NFTA_DUP_SREG_DEV]) + err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], + &priv->sreg_dev, sizeof(int)); + + return err; } static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 6caa062f68e7..135e3a060caa 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -22,6 +22,11 @@ #include #include +static void ping_v6_destroy(struct sock *sk) +{ + inet6_destroy_sock(sk); +} + /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -111,7 +116,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); ipcm6_init_sk(&ipc6, np); ipc6.sockc.mark = sk->sk_mark; @@ -166,6 +171,7 @@ struct proto pingv6_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, + .destroy = ping_v6_destroy, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 38349054e361..31eb54e92b3f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -915,7 +915,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_oif = np->mcast_oif; else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); if (hdrincl) fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7c879481130d..db5492308401 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4436,8 +4436,15 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, } f6i = ip6_route_info_create(&cfg, gfp_flags, NULL); - if (!IS_ERR(f6i)) + if (!IS_ERR(f6i)) { f6i->dst_nocount = true; + + if (!anycast && + (net->ipv6.devconf_all->disable_policy || + idev->cnf.disable_policy)) + f6i->dst_nopolicy = true; + } + return f6i; } @@ -5591,7 +5598,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (nexthop_is_blackhole(rt->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (net->ipv4.sysctl_nexthop_compat_mode && + if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) && rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) goto nla_put_failure; diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 85dddfe3a2c6..552bce1fdfb9 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -400,7 +400,6 @@ int __init seg6_hmac_init(void) { return seg6_hmac_init_algo(); } -EXPORT_SYMBOL(seg6_hmac_init); int __net_init seg6_hmac_net_init(struct net *net) { @@ -410,7 +409,6 @@ int __net_init seg6_hmac_net_init(struct net *net) return 0; } -EXPORT_SYMBOL(seg6_hmac_net_init); void seg6_hmac_exit(void) { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 4d4399c5c5ea..40ac23242c37 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -188,6 +188,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, tot_len); return 0; @@ -240,6 +242,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); return 0; @@ -301,7 +305,6 @@ static int seg6_do_srh(struct sk_buff *skb) break; } - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return 0; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index eba23279912d..11f7da4139f6 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -435,7 +435,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); @@ -467,7 +466,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb, if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index bab0e99f6e35..3c92e8cacbba 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -321,9 +321,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) : NULL; - rcu_read_lock(); - - ca = t->prl_count < cmax ? t->prl_count : cmax; + ca = min(t->prl_count, cmax); if (!kp) { /* We don't try hard to allocate much memory for @@ -338,7 +336,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) } } - c = 0; + rcu_read_lock(); for_each_prl_rcu(t->prl) { if (c >= cmax) break; @@ -350,7 +348,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) if (kprl.addr != htonl(INADDR_ANY)) break; } -out: + rcu_read_unlock(); len = sizeof(*kp) * c; @@ -359,7 +357,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) ret = -EFAULT; kfree(kp); - +out: return ret; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 5fa791cf39ca..12ae817aaf2e 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) @@ -234,7 +235,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; fl6.flowi6_uid = sk->sk_uid; - security_req_classify_flow(req, flowi6_to_flowi(&fl6)); + security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index df33145b876c..8d91f36cb11b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -278,7 +278,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { @@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); - tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? + tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (np->tclass & INET_ECN_MASK) : np->tclass; @@ -975,7 +975,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); - security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST * Underlying function will use this to retrieve the network @@ -1344,7 +1344,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ - if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; /* Clone native IPv6 options from listening socket (if any) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 10760164a80f..7745d8a40209 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1497,7 +1497,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); if (ipc6.tclass < 0) ipc6.tclass = np->tclass; diff --git a/net/key/af_key.c b/net/key/af_key.c index bd9b5c573b5a..2aa16a171285 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2830,6 +2830,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; + /* Non-zero return value of pfkey_broadcast() does not always signal + * an error and even on an actual error we may still want to process + * the message so rather ignore the return value. + */ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); @@ -2902,7 +2906,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg)) + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2920,7 +2924,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg))) + if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; for (k = 1; ; k++) { @@ -2931,7 +2935,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg)) + if (aalg_tmpl_set(t, aalg) && aalg->available) sz += sizeof(struct sadb_comb); } } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index e5e5036257b0..d54dbd01d86f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -502,14 +502,15 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; int transhdrlen = 4; /* zero session-id */ - int ulen = len + transhdrlen; + int ulen; int err; /* Rough check on arithmetic overflow, * better check is made in ip6_append_data(). */ - if (len > INT_MAX) + if (len > INT_MAX - transhdrlen) return -EMSGSIZE; + ulen = len + transhdrlen; /* Mirror BSD error message compatibility */ if (msg->msg_flags & MSG_OOB) @@ -606,7 +607,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); if (ipc6.tclass < 0) ipc6.tclass = np->tclass; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 8f48aff74c7b..5639a71257e4 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1652,12 +1652,9 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata) if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) { if (old_ctx) - err = ieee80211_vif_use_reserved_reassign(sdata); - else - err = ieee80211_vif_use_reserved_assign(sdata); + return ieee80211_vif_use_reserved_reassign(sdata); - if (err) - return err; + return ieee80211_vif_use_reserved_assign(sdata); } /* diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index fe8f586886b4..bcc94cc1b620 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1103,6 +1103,9 @@ struct tpt_led_trigger { * a scan complete for an aborted scan. * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being * cancelled. + * @SCAN_BEACON_WAIT: Set whenever we're passive scanning because of radar/no-IR + * and could send a probe request after receiving a beacon. + * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request */ enum { SCAN_SW_SCANNING, @@ -1111,6 +1114,8 @@ enum { SCAN_COMPLETED, SCAN_ABORTED, SCAN_HW_CANCELLED, + SCAN_BEACON_WAIT, + SCAN_BEACON_DONE, }; /** diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1e7614abd947..e991abb45f68 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1387,8 +1387,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, goto dont_reorder; /* not part of a BA session */ - if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && - ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) + if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK) goto dont_reorder; /* new, potentially un-ordered, ampdu frame - process it */ diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 6b50cb5e0e3c..887f945bb12d 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -277,6 +277,16 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) if (likely(!sdata1 && !sdata2)) return; + if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) { + /* + * we were passive scanning because of radar/no-IR, but + * the beacon/proberesp rx gives us an opportunity to upgrade + * to active scan + */ + set_bit(SCAN_BEACON_DONE, &local->scanning); + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); + } + if (ieee80211_is_probe_resp(mgmt->frame_control)) { struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; @@ -783,6 +793,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, IEEE80211_CHAN_RADAR)) || !req->n_ssids) { next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; + if (req->n_ssids) + set_bit(SCAN_BEACON_WAIT, &local->scanning); } else { ieee80211_scan_state_send_probe(local, &next_delay); next_delay = IEEE80211_CHANNEL_TIME; @@ -994,6 +1006,8 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, !scan_req->n_ssids) { *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; local->next_scan_state = SCAN_DECISION; + if (scan_req->n_ssids) + set_bit(SCAN_BEACON_WAIT, &local->scanning); return; } @@ -1086,6 +1100,8 @@ void ieee80211_scan_work(struct work_struct *work) goto out; } + clear_bit(SCAN_BEACON_WAIT, &local->scanning); + /* * as long as no delay is required advance immediately * without scheduling a new work @@ -1096,6 +1112,10 @@ void ieee80211_scan_work(struct work_struct *work) goto out_complete; } + if (test_and_clear_bit(SCAN_BEACON_DONE, &local->scanning) && + local->next_scan_state == SCAN_DECISION) + local->next_scan_state = SCAN_SEND_PROBE; + switch (local->next_scan_state) { case SCAN_DECISION: /* if no more bands/channels left, complete scan */ diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 2fb99325135a..b9404b056087 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -145,8 +145,8 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, bool qos; /* all mesh/ocb stations are required to support WME */ - if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_OCB) + if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || + sdata->vif.type == NL80211_IFTYPE_OCB)) qos = true; else if (sta) qos = sta->sta.wme; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 8123c79e2791..d0e91aa7b30e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1421,7 +1421,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (msk->rcvq_space.copied <= msk->rcvq_space.space) goto new_measure; - if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvmem, rcvbuf; u64 rcvwin, grow; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 2fc4ae960769..3d6d49420db8 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -854,7 +854,7 @@ synproxy_send_tcp_ipv6(struct net *net, fl6.fl6_sport = nth->source; fl6.fl6_dport = nth->dest; security_skb_classify_flow((struct sk_buff *)skb, - flowi6_to_flowi(&fl6)); + flowi6_to_flowi_common(&fl6)); err = nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false); if (err) { goto free_nskb; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fdd1da9ecea9..e5622e925ea9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -481,6 +481,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); + INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); nft_trans_flowtable(trans) = flowtable; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -1733,7 +1734,6 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, goto err_hook_dev; } hook->ops.dev = dev; - hook->inactive = false; return hook; @@ -1963,7 +1963,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, chain->flags |= NFT_CHAIN_BASE | flags; basechain->policy = NF_ACCEPT; if (chain->flags & NFT_CHAIN_HW_OFFLOAD && - nft_chain_offload_priority(basechain) < 0) + !nft_chain_offload_support(basechain)) return -EOPNOTSUPP; flow_block_init(&basechain->flow_block); @@ -2679,27 +2679,31 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, err = nf_tables_expr_parse(ctx, nla, &info); if (err < 0) - goto err1; + goto err_expr_parse; + + err = -EOPNOTSUPP; + if (!(info.ops->type->flags & NFT_EXPR_STATEFUL)) + goto err_expr_stateful; err = -ENOMEM; expr = kzalloc(info.ops->size, GFP_KERNEL); if (expr == NULL) - goto err2; + goto err_expr_stateful; err = nf_tables_newexpr(ctx, &info, expr); if (err < 0) - goto err3; + goto err_expr_new; return expr; -err3: +err_expr_new: kfree(expr); -err2: +err_expr_stateful: owner = info.ops->type->owner; if (info.ops->type->release_ops) info.ops->type->release_ops(info.ops); module_put(owner); -err1: +err_expr_parse: return ERR_PTR(err); } @@ -4047,6 +4051,9 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, u32 len; int err; + if (desc->field_count >= ARRAY_SIZE(desc->field_len)) + return -E2BIG; + err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr, nft_concat_policy, NULL); if (err < 0) @@ -4056,9 +4063,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, return -EINVAL; len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN])); - - if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT) - return -E2BIG; + if (!len || len > U8_MAX) + return -EINVAL; desc->field_len[desc->field_count++] = len; @@ -4069,7 +4075,8 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, const struct nlattr *nla) { struct nlattr *attr; - int rem, err; + u32 num_regs = 0; + int rem, err, i; nla_for_each_nested(attr, nla, rem) { if (nla_type(attr) != NFTA_LIST_ELEM) @@ -4080,6 +4087,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, return err; } + for (i = 0; i < desc->field_count; i++) + num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32)); + + if (num_regs > NFT_REG32_COUNT) + return -E2BIG; + return 0; } @@ -4401,6 +4414,12 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, return nft_delset(&ctx, set); } +static int nft_validate_register_store(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type, + unsigned int len); + static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, @@ -4867,13 +4886,20 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, struct nft_data *data, struct nlattr *attr) { + u32 dtype; int err; err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr); if (err < 0) return err; - if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) { + if (set->dtype == NFT_DATA_VERDICT) + dtype = NFT_DATA_VERDICT; + else + dtype = NFT_DATA_VALUE; + + if (dtype != desc->type || + set->dlen != desc->len) { nft_data_release(data, desc->type); return -EINVAL; } @@ -5055,9 +5081,6 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, return expr; err = -EOPNOTSUPP; - if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL)) - goto err_set_elem_expr; - if (expr->ops->type->flags & NFT_EXPR_GC) { if (set->flags & NFT_SET_TIMEOUT) goto err_set_elem_expr; @@ -6684,11 +6707,15 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, if (nla[NFTA_FLOWTABLE_FLAGS]) { flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); - if (flags & ~NFT_FLOWTABLE_MASK) - return -EOPNOTSUPP; + if (flags & ~NFT_FLOWTABLE_MASK) { + err = -EOPNOTSUPP; + goto err_flowtable_update_hook; + } if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^ - (flags & NFT_FLOWTABLE_HW_OFFLOAD)) - return -EOPNOTSUPP; + (flags & NFT_FLOWTABLE_HW_OFFLOAD)) { + err = -EOPNOTSUPP; + goto err_flowtable_update_hook; + } } else { flags = flowtable->data.flags; } @@ -6870,6 +6897,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, { const struct nlattr * const *nla = ctx->nla; struct nft_flowtable_hook flowtable_hook; + LIST_HEAD(flowtable_del_list); struct nft_hook *this, *hook; struct nft_trans *trans; int err; @@ -6885,7 +6913,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, err = -ENOENT; goto err_flowtable_del_hook; } - hook->inactive = true; + list_move(&hook->list, &flowtable_del_list); } trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE, @@ -6898,6 +6926,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, nft_trans_flowtable(trans) = flowtable; nft_trans_flowtable_update(trans) = true; INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); + list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans)); nft_flowtable_hook_release(&flowtable_hook); list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -6905,13 +6934,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, return 0; err_flowtable_del_hook: - list_for_each_entry(this, &flowtable_hook.list, list) { - hook = nft_hook_list_find(&flowtable->hook_list, this); - if (!hook) - break; - - hook->inactive = false; - } + list_splice(&flowtable_del_list, &flowtable->hook_list); nft_flowtable_hook_release(&flowtable_hook); return err; @@ -7577,6 +7600,9 @@ static void nft_commit_release(struct nft_trans *trans) nf_tables_chain_destroy(&trans->ctx); break; case NFT_MSG_DELRULE: + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); + nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: @@ -7761,17 +7787,6 @@ void nft_chain_del(struct nft_chain *chain) list_del_rcu(&chain->list); } -static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable, - struct list_head *hook_list) -{ - struct nft_hook *hook, *next; - - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - if (hook->inactive) - list_move(&hook->list, hook_list); - } -} - static void nf_tables_module_autoload_cleanup(struct net *net) { struct nft_module_request *req, *next; @@ -7947,6 +7962,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); + if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); + nft_trans_destroy(trans); break; case NFT_MSG_DELRULE: @@ -8035,8 +8053,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELFLOWTABLE: if (nft_trans_flowtable_update(trans)) { - nft_flowtable_hooks_del(nft_trans_flowtable(trans), - &nft_trans_flowtable_hooks(trans)); nf_tables_flowtable_notify(&trans->ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), @@ -8114,7 +8130,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { struct nft_trans *trans, *next; struct nft_trans_elem *te; - struct nft_hook *hook; if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) @@ -8232,8 +8247,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELFLOWTABLE: if (nft_trans_flowtable_update(trans)) { - list_for_each_entry(hook, &nft_trans_flowtable(trans)->hook_list, list) - hook->inactive = false; + list_splice(&nft_trans_flowtable_hooks(trans), + &nft_trans_flowtable(trans)->hook_list); } else { trans->ctx.table->use++; nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); @@ -8512,7 +8527,7 @@ EXPORT_SYMBOL_GPL(nft_dump_register); * Validate that the input register is one of the general purpose * registers and that the length of the load is within the bounds. */ -int nft_validate_register_load(enum nft_registers reg, unsigned int len) +static int nft_validate_register_load(enum nft_registers reg, unsigned int len) { if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; @@ -8523,7 +8538,21 @@ int nft_validate_register_load(enum nft_registers reg, unsigned int len) return 0; } -EXPORT_SYMBOL_GPL(nft_validate_register_load); + +int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) +{ + u32 reg; + int err; + + reg = nft_parse_register(attr); + err = nft_validate_register_load(reg, len); + if (err < 0) + return err; + + *sreg = reg; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_register_load); /** * nft_validate_register_store - validate an expressions' register store @@ -8539,10 +8568,11 @@ EXPORT_SYMBOL_GPL(nft_validate_register_load); * A value of NULL for the data means that its runtime gathered * data. */ -int nft_validate_register_store(const struct nft_ctx *ctx, - enum nft_registers reg, - const struct nft_data *data, - enum nft_data_types type, unsigned int len) +static int nft_validate_register_store(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type, + unsigned int len) { int err; @@ -8574,7 +8604,24 @@ int nft_validate_register_store(const struct nft_ctx *ctx, return 0; } } -EXPORT_SYMBOL_GPL(nft_validate_register_store); + +int nft_parse_register_store(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *dreg, + const struct nft_data *data, + enum nft_data_types type, unsigned int len) +{ + int err; + u32 reg; + + reg = nft_parse_register(attr); + err = nft_validate_register_store(ctx, reg, data, type, len); + if (err < 0) + return err; + + *dreg = reg; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_register_store); static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 839fd09f1bb4..4e99b1731b3f 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -208,7 +208,7 @@ static int nft_setup_cb_call(enum tc_setup_type type, void *type_data, return 0; } -int nft_chain_offload_priority(struct nft_base_chain *basechain) +static int nft_chain_offload_priority(const struct nft_base_chain *basechain) { if (basechain->ops.priority <= 0 || basechain->ops.priority > USHRT_MAX) @@ -217,6 +217,27 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain) return 0; } +bool nft_chain_offload_support(const struct nft_base_chain *basechain) +{ + struct net_device *dev; + struct nft_hook *hook; + + if (nft_chain_offload_priority(basechain) < 0) + return false; + + list_for_each_entry(hook, &basechain->hook_list, list) { + if (hook->ops.pf != NFPROTO_NETDEV || + hook->ops.hooknum != NF_NETDEV_INGRESS) + return false; + + dev = hook->ops.dev; + if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists()) + return false; + } + + return true; +} + static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow, const struct nft_base_chain *basechain, const struct nft_rule *rule, diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 1640da5c5077..72d30922ed29 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -838,11 +838,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } static int -nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) +nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff) { struct sk_buff *nskb; if (diff < 0) { + unsigned int min_len = skb_transport_offset(e->skb); + + if (data_len < min_len) + return -EINVAL; + if (pskb_trim(e->skb, data_len)) return -ENOMEM; } else if (diff > 0) { diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index bbd773d74377..47b0dba95054 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -16,8 +16,8 @@ #include struct nft_bitwise { - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; enum nft_bitwise_ops op:8; u8 len; struct nft_data mask; @@ -169,14 +169,14 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, priv->len = len; - priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); - err = nft_validate_register_load(priv->sreg, priv->len); + err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg, + priv->len); if (err < 0) return err; - priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); if (err < 0) return err; @@ -315,14 +315,13 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx, struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); int err; - priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); - err = nft_validate_register_load(priv->sreg, sizeof(u32)); + err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg, + sizeof(u32)); if (err < 0) return err; - priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 12bed3f7bbc6..9d5947ab8d4e 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -16,8 +16,8 @@ #include struct nft_byteorder { - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; enum nft_byteorder_ops op:8; u8 len; u8 size; @@ -131,20 +131,20 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]); err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); if (err < 0) return err; priv->len = len; - err = nft_validate_register_load(priv->sreg, priv->len); + err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg, + priv->len); if (err < 0) return err; - priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 1d42d06f5b64..b529c0e86546 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -18,7 +18,7 @@ struct nft_cmp_expr { struct nft_data data; - enum nft_registers sreg:8; + u8 sreg; u8 len; enum nft_cmp_ops op:8; }; @@ -87,8 +87,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return err; } - priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); - err = nft_validate_register_load(priv->sreg, desc.len); + err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; @@ -211,8 +210,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, if (err < 0) return err; - priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); - err = nft_validate_register_load(priv->sreg, desc.len); + err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 7fcb73ac2e6e..781118465d46 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -27,8 +27,8 @@ struct nft_ct { enum nft_ct_keys key:8; enum ip_conntrack_dir dir:8; union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 dreg; + u8 sreg; }; }; @@ -499,9 +499,8 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, } } - priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL, + NFT_DATA_VALUE, len); if (err < 0) return err; @@ -608,8 +607,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, } } - priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]); - err = nft_validate_register_load(priv->sreg, len); + err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len); if (err < 0) goto err1; diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index 70c457476b87..5b5c607fbf83 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -14,7 +14,7 @@ #include struct nft_dup_netdev { - enum nft_registers sreg_dev:8; + u8 sreg_dev; }; static void nft_dup_netdev_eval(const struct nft_expr *expr, @@ -40,8 +40,8 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_DEV] == NULL) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, + sizeof(int)); } static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 58904bee1a0d..8c45e01fecdd 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -16,8 +16,8 @@ struct nft_dynset { struct nft_set *set; struct nft_set_ext_tmpl tmpl; enum nft_dynset_ops op:8; - enum nft_registers sreg_key:8; - enum nft_registers sreg_data:8; + u8 sreg_key; + u8 sreg_data; bool invert; u64 timeout; struct nft_expr *expr; @@ -154,8 +154,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return err; } - priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); - err = nft_validate_register_load(priv->sreg_key, set->klen); + err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key, + set->klen); if (err < 0) return err; @@ -165,8 +165,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (set->dtype == NFT_DATA_VERDICT) return -EOPNOTSUPP; - priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]); - err = nft_validate_register_load(priv->sreg_data, set->dlen); + err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA], + &priv->sreg_data, set->dlen); if (err < 0) return err; } else if (set->flags & NFT_SET_MAP) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index faa0844c01fb..670dd146fb2b 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -19,8 +19,8 @@ struct nft_exthdr { u8 offset; u8 len; u8 op; - enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 dreg; + u8 sreg; u8 flags; }; @@ -353,12 +353,12 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; - priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]); priv->flags = flags; priv->op = op; - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, @@ -403,11 +403,11 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; - priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]); priv->flags = flags; priv->op = op; - return nft_validate_register_load(priv->sreg, priv->len); + return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg, + priv->len); } static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 4dfdaeaf09a5..b10ce732b337 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -86,7 +86,6 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); - priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]); switch (priv->result) { case NFT_FIB_RESULT_OIF: @@ -106,8 +105,8 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + err = nft_parse_register_store(ctx, tb[NFTA_FIB_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); if (err < 0) return err; diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 3b0dcd170551..7730409f6f09 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -18,7 +18,7 @@ #include struct nft_fwd_netdev { - enum nft_registers sreg_dev:8; + u8 sreg_dev; }; static void nft_fwd_netdev_eval(const struct nft_expr *expr, @@ -50,8 +50,8 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_FWD_SREG_DEV] == NULL) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + sizeof(int)); } static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -83,8 +83,8 @@ static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr) } struct nft_fwd_neigh { - enum nft_registers sreg_dev:8; - enum nft_registers sreg_addr:8; + u8 sreg_dev; + u8 sreg_addr; u8 nfproto; }; @@ -162,8 +162,6 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, !tb[NFTA_FWD_NFPROTO]) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); - priv->sreg_addr = nft_parse_register(tb[NFTA_FWD_SREG_ADDR]); priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO])); switch (priv->nfproto) { @@ -177,11 +175,13 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - err = nft_validate_register_load(priv->sreg_dev, sizeof(int)); + err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + sizeof(int)); if (err < 0) return err; - return nft_validate_register_load(priv->sreg_addr, addr_len); + return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr, + addr_len); } static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 96371d878e7e..f829f5289e16 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -14,8 +14,8 @@ #include struct nft_jhash { - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; u8 len; bool autogen_seed:1; u32 modulus; @@ -38,7 +38,7 @@ static void nft_jhash_eval(const struct nft_expr *expr, } struct nft_symhash { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; u32 offset; }; @@ -83,9 +83,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx, if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); - priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); - priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); - err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len); if (err < 0) return err; @@ -94,6 +91,10 @@ static int nft_jhash_init(const struct nft_ctx *ctx, priv->len = len; + err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len); + if (err < 0) + return err; + priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); if (priv->modulus < 1) return -ERANGE; @@ -108,9 +109,8 @@ static int nft_jhash_init(const struct nft_ctx *ctx, get_random_bytes(&priv->seed, sizeof(priv->seed)); } - return nft_validate_register_load(priv->sreg, len) && - nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_symhash_init(const struct nft_ctx *ctx, @@ -126,8 +126,6 @@ static int nft_symhash_init(const struct nft_ctx *ctx, if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); - priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); - priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); if (priv->modulus < 1) return -ERANGE; @@ -135,8 +133,9 @@ static int nft_symhash_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + sizeof(u32)); } static int nft_jhash_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 5c9d88560a47..d0f67d325bdf 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -48,9 +48,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx, priv->dlen = desc.len; - priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, &priv->data, - desc.type, desc.len); + err = nft_parse_register_store(ctx, tb[NFTA_IMMEDIATE_DREG], + &priv->dreg, &priv->data, desc.type, + desc.len); if (err < 0) goto err1; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index f1363b8aabba..b0f558b4fea5 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -17,8 +17,8 @@ struct nft_lookup { struct nft_set *set; - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; bool invert; struct nft_set_binding binding; }; @@ -76,8 +76,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); - priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]); - err = nft_validate_register_load(priv->sreg, set->klen); + err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg, + set->klen); if (err < 0) return err; @@ -100,9 +100,9 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_MAP)) return -EINVAL; - priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - set->dtype, set->dlen); + err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG], + &priv->dreg, NULL, set->dtype, + set->dlen); if (err < 0) return err; } else if (set->flags & NFT_SET_MAP) diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 71390b727040..9953e8053753 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -15,8 +15,8 @@ struct nft_masq { u32 flags; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; + u8 sreg_proto_min; + u8 sreg_proto_max; }; static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { @@ -54,19 +54,15 @@ static int nft_masq_init(const struct nft_ctx *ctx, } if (tb[NFTA_MASQ_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_MASQ_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index bf4b3ad5314c..44d9b38e5f90 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -32,8 +33,6 @@ #define NFT_META_SECS_PER_DAY 86400 #define NFT_META_DAYS_PER_WEEK 7 -static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state); - static u8 nft_meta_weekday(void) { time64_t secs = ktime_get_real_seconds(); @@ -267,13 +266,6 @@ static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest, return true; } -static noinline u32 nft_prandom_u32(void) -{ - struct rnd_state *state = this_cpu_ptr(&nft_prandom_state); - - return prandom_u32_state(state); -} - #ifdef CONFIG_IP_ROUTE_CLASSID static noinline bool nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest) @@ -385,7 +377,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; #endif case NFT_META_PRANDOM: - *dest = nft_prandom_u32(); + *dest = get_random_u32(); break; #ifdef CONFIG_XFRM case NFT_META_SECPATH: @@ -514,7 +506,6 @@ int nft_meta_get_init(const struct nft_ctx *ctx, len = IFNAMSIZ; break; case NFT_META_PRANDOM: - prandom_init_once(&nft_prandom_state); len = sizeof(u32); break; #ifdef CONFIG_XFRM @@ -535,9 +526,8 @@ int nft_meta_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } EXPORT_SYMBOL_GPL(nft_meta_get_init); @@ -661,8 +651,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->sreg = nft_parse_register(tb[NFTA_META_SREG]); - err = nft_validate_register_load(priv->sreg, len); + err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); if (err < 0) return err; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index ea53fd999f46..db8f9116eeb4 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -21,10 +21,10 @@ #include struct nft_nat { - enum nft_registers sreg_addr_min:8; - enum nft_registers sreg_addr_max:8; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; + u8 sreg_addr_min; + u8 sreg_addr_max; + u8 sreg_proto_min; + u8 sreg_proto_max; enum nf_nat_manip_type type:8; u8 family; u16 flags; @@ -208,18 +208,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { - priv->sreg_addr_min = - nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]); - err = nft_validate_register_load(priv->sreg_addr_min, alen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN], + &priv->sreg_addr_min, alen); if (err < 0) return err; if (tb[NFTA_NAT_REG_ADDR_MAX]) { - priv->sreg_addr_max = - nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]); - - err = nft_validate_register_load(priv->sreg_addr_max, - alen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX], + &priv->sreg_addr_max, + alen); if (err < 0) return err; } else { @@ -231,19 +228,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, plen = sizeof_field(struct nf_nat_range, min_addr.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_NAT_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { @@ -341,7 +334,8 @@ static void nft_nat_inet_eval(const struct nft_expr *expr, { const struct nft_nat *priv = nft_expr_priv(expr); - if (priv->family == nft_pf(pkt)) + if (priv->family == nft_pf(pkt) || + priv->family == NFPROTO_INET) nft_nat_eval(expr, regs, pkt); } diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index f1fc824f9737..4e43214e88de 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -9,14 +9,13 @@ #include #include #include +#include #include #include #include -static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); - struct nft_ng_inc { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; atomic_t counter; u32 offset; @@ -66,11 +65,10 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); atomic_set(&priv->counter, priv->modulus - 1); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, @@ -100,17 +98,14 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) } struct nft_ng_random { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; u32 offset; }; -static u32 nft_ng_random_gen(struct nft_ng_random *priv) +static u32 nft_ng_random_gen(const struct nft_ng_random *priv) { - struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); - - return reciprocal_scale(prandom_u32_state(state), priv->modulus) + - priv->offset; + return reciprocal_scale(get_random_u32(), priv->modulus) + priv->offset; } static void nft_ng_random_eval(const struct nft_expr *expr, @@ -138,12 +133,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - prandom_init_once(&nft_numgen_prandom_state); - - priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); - - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 5f9207a9f485..bc104d36d3bb 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -95,7 +95,7 @@ static const struct nft_expr_ops nft_objref_ops = { struct nft_objref_map { struct nft_set *set; - enum nft_registers sreg:8; + u8 sreg; struct nft_set_binding binding; }; @@ -137,8 +137,8 @@ static int nft_objref_map_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_OBJECT)) return -EINVAL; - priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]); - err = nft_validate_register_load(priv->sreg, set->klen); + err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg, + set->klen); if (err < 0) return err; diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 2c957629ea66..d82677e83400 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -6,7 +6,7 @@ #include struct nft_osf { - enum nft_registers dreg:8; + u8 dreg; u8 ttl; u32 flags; }; @@ -83,9 +83,9 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->flags = flags; } - priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); + err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, + NFT_OSF_MAXGENRELEN); if (err < 0) return err; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 6a8495bd08bb..01878c16418c 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -144,10 +144,10 @@ static int nft_payload_init(const struct nft_ctx *ctx, priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -664,7 +664,6 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]); if (tb[NFTA_PAYLOAD_CSUM_TYPE]) priv->csum_type = @@ -697,7 +696,8 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - return nft_validate_register_load(priv->sreg, priv->len); + return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg, + priv->len); } static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 23265d757acb..9ba1de51ac07 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -19,10 +19,10 @@ static u32 jhash_initval __read_mostly; struct nft_queue { - enum nft_registers sreg_qnum:8; - u16 queuenum; - u16 queues_total; - u16 flags; + u8 sreg_qnum; + u16 queuenum; + u16 queues_total; + u16 flags; }; static void nft_queue_eval(const struct nft_expr *expr, @@ -111,8 +111,8 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx, struct nft_queue *priv = nft_expr_priv(expr); int err; - priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]); - err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32)); + err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM], + &priv->sreg_qnum, sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 89efcc5a533d..e4a1c44d7f51 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -15,7 +15,7 @@ struct nft_range_expr { struct nft_data data_from; struct nft_data data_to; - enum nft_registers sreg:8; + u8 sreg; u8 len; enum nft_range_ops op:8; }; @@ -86,8 +86,8 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr goto err2; } - priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]); - err = nft_validate_register_load(priv->sreg, desc_from.len); + err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg, + desc_from.len); if (err < 0) goto err2; diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 2056051c0af0..ba09890dddb5 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -14,8 +14,8 @@ #include struct nft_redir { - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; + u8 sreg_proto_min; + u8 sreg_proto_max; u16 flags; }; @@ -50,19 +50,15 @@ static int nft_redir_init(const struct nft_ctx *ctx, plen = sizeof_field(struct nf_nat_range, min_addr.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 7cfcb0e2f7ee..bcd01a63e38f 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -15,7 +15,7 @@ struct nft_rt { enum nft_rt_keys key:8; - enum nft_registers dreg:8; + u8 dreg; }; static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst) @@ -141,9 +141,8 @@ static int nft_rt_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_RT_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_rt_get_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 858c8d4d659a..a5cfb321ae23 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -142,6 +142,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, /* Another cpu may race to insert the element with the same key */ if (prev) { nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); he = prev; } @@ -151,6 +152,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, err2: nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); err1: return false; } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index f67c4436c5d3..949da87dbb06 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2120,6 +2120,32 @@ static int nft_pipapo_init(const struct nft_set *set, return err; } +/** + * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array + * @set: nftables API set representation + * @m: matching data pointing to key mapping array + */ +static void nft_set_pipapo_match_destroy(const struct nft_set *set, + struct nft_pipapo_match *m) +{ + struct nft_pipapo_field *f; + int i, r; + + for (i = 0, f = m->f; i < m->field_count - 1; i++, f++) + ; + + for (r = 0; r < f->rules; r++) { + struct nft_pipapo_elem *e; + + if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e) + continue; + + e = f->mt[r].e; + + nft_set_elem_destroy(set, e, true); + } +} + /** * nft_pipapo_destroy() - Free private data for set and all committed elements * @set: nftables API set representation @@ -2128,26 +2154,13 @@ static void nft_pipapo_destroy(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; - struct nft_pipapo_field *f; - int i, r, cpu; + int cpu; m = rcu_dereference_protected(priv->match, true); if (m) { rcu_barrier(); - for (i = 0, f = m->f; i < m->field_count - 1; i++, f++) - ; - - for (r = 0; r < f->rules; r++) { - struct nft_pipapo_elem *e; - - if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e) - continue; - - e = f->mt[r].e; - - nft_set_elem_destroy(set, e, true); - } + nft_set_pipapo_match_destroy(set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); @@ -2161,6 +2174,11 @@ static void nft_pipapo_destroy(const struct nft_set *set) } if (priv->clone) { + m = priv->clone; + + if (priv->dirty) + nft_set_pipapo_match_destroy(set, m); + #ifdef NFT_PIPAPO_ALIGN free_percpu(priv->clone->scratch_aligned); #endif diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 8a0125e966c8..f6d517185d9c 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -10,7 +10,7 @@ struct nft_socket { enum nft_socket_keys key:8; union { - enum nft_registers dreg:8; + u8 dreg; }; }; @@ -146,9 +146,8 @@ static int nft_socket_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_SOCKET_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_socket_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 242222dc52c3..37c728bdad41 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -13,9 +13,9 @@ #endif struct nft_tproxy { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_port:8; - u8 family; + u8 sreg_addr; + u8 sreg_port; + u8 family; }; static void nft_tproxy_eval_v4(const struct nft_expr *expr, @@ -254,15 +254,15 @@ static int nft_tproxy_init(const struct nft_ctx *ctx, } if (tb[NFTA_TPROXY_REG_ADDR]) { - priv->sreg_addr = nft_parse_register(tb[NFTA_TPROXY_REG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, alen); + err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR], + &priv->sreg_addr, alen); if (err < 0) return err; } if (tb[NFTA_TPROXY_REG_PORT]) { - priv->sreg_port = nft_parse_register(tb[NFTA_TPROXY_REG_PORT]); - err = nft_validate_register_load(priv->sreg_port, sizeof(u16)); + err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT], + &priv->sreg_port, sizeof(u16)); if (err < 0) return err; } diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index d3eb953d0333..3b27926d5382 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -15,7 +15,7 @@ struct nft_tunnel { enum nft_tunnel_keys key:8; - enum nft_registers dreg:8; + u8 dreg; enum nft_tunnel_mode mode:8; }; @@ -93,8 +93,6 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]); - if (tb[NFTA_TUNNEL_MODE]) { priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE])); if (priv->mode > NFT_TUNNEL_MODE_MAX) @@ -103,8 +101,8 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, priv->mode = NFT_TUNNEL_MODE_NONE; } - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_TUNNEL_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_tunnel_get_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 06d5cabf1d7c..cbbbc4ecad3a 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -24,7 +24,7 @@ static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = { struct nft_xfrm { enum nft_xfrm_keys key:8; - enum nft_registers dreg:8; + u8 dreg; u8 dir; u8 spnum; }; @@ -86,9 +86,8 @@ static int nft_xfrm_get_init(const struct nft_ctx *ctx, priv->spnum = spnum; - priv->dreg = nft_parse_register(tb[NFTA_XFRM_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_XFRM_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } /* Return true if key asks for daddr/saddr and current diff --git a/net/nfc/core.c b/net/nfc/core.c index 3b2983813ff1..2ef56366bd5f 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1158,6 +1158,7 @@ void nfc_unregister_device(struct nfc_dev *dev) if (dev->rfkill) { rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); + dev->rfkill = NULL; } dev->shutting_down = true; device_unlock(&dev->dev); diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index ce3382be937f..b002e18f38c8 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, skb_frag = nci_skb_alloc(ndev, (NCI_DATA_HDR_SIZE + frag_len), - GFP_KERNEL); + GFP_ATOMIC); if (skb_frag == NULL) { rc = -ENOMEM; goto free_exit; diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 04e55ccb3383..4fe336ff2bfa 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, i = 0; skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; @@ -186,7 +186,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, if (i < data_len) { skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + - NCI_DATA_HDR_SIZE, GFP_KERNEL); + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 6d8d70021666..80fee9d118ee 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -372,6 +372,7 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, update_ip_l4_checksum(skb, nh, *addr, new_addr); csum_replace4(&nh->check, *addr, new_addr); skb_clear_hash(skb); + ovs_ct_clear(skb, NULL); *addr = new_addr; } @@ -419,6 +420,7 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, update_ipv6_checksum(skb, l4_proto, addr, new_addr); skb_clear_hash(skb); + ovs_ct_clear(skb, NULL); memcpy(addr, new_addr, sizeof(__be32[4])); } @@ -659,6 +661,7 @@ static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key, static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) { + ovs_ct_clear(skb, NULL); inet_proto_csum_replace2(check, skb, *port, new_port, false); *port = new_port; } @@ -698,6 +701,7 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key, uh->dest = dst; flow_key->tp.src = src; flow_key->tp.dst = dst; + ovs_ct_clear(skb, NULL); } skb_clear_hash(skb); @@ -760,6 +764,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, sh->checksum = old_csum ^ old_correct_csum ^ new_csum; skb_clear_hash(skb); + ovs_ct_clear(skb, NULL); + flow_key->tp.src = sh->source; flow_key->tp.dst = sh->dest; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 7ff98d39ec94..41f248895a87 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1324,7 +1324,8 @@ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key) if (skb_nfct(skb)) { nf_conntrack_put(skb_nfct(skb)); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); - ovs_ct_fill_key(skb, key); + if (key) + ovs_ct_fill_key(skb, key); } return 0; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b03d142ec82e..c9ba61413c98 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -265,7 +265,7 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) if (flags & IP6_FH_F_FRAG) { if (frag_off) { key->ip.frag = OVS_FRAG_TYPE_LATER; - key->ip.proto = nexthdr; + key->ip.proto = NEXTHDR_FRAGMENT; return 0; } key->ip.frag = OVS_FRAG_TYPE_FIRST; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 6e35703ff353..95b198f84a3a 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -227,8 +227,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) { struct rose_neigh *s; - rose_stop_ftimer(rose_neigh); - rose_stop_t0timer(rose_neigh); + del_timer_sync(&rose_neigh->ftimer); + del_timer_sync(&rose_neigh->t0timer); skb_queue_purge(&rose_neigh->queue); diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index b3138fc2e552..f06ddbed3fed 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -31,89 +31,89 @@ static void rose_idletimer_expiry(struct timer_list *); void rose_start_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); sk->sk_timer.function = rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires); } void rose_start_t1timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t2timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t3timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_hbtimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_idletimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->idletimer); + sk_stop_timer(sk, &rose->idletimer); if (rose->idle > 0) { rose->idletimer.function = rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; - add_timer(&rose->idletimer); + sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires); } } void rose_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } void rose_stop_timer(struct sock *sk) { - del_timer(&rose_sk(sk)->timer); + sk_stop_timer(sk, &rose_sk(sk)->timer); } void rose_stop_idletimer(struct sock *sk) { - del_timer(&rose_sk(sk)->idletimer); + sk_stop_timer(sk, &rose_sk(sk)->idletimer); } static void rose_heartbeat_expiry(struct timer_list *t) @@ -130,6 +130,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { bh_unlock_sock(sk); rose_destroy_socket(sk); + sock_put(sk); return; } break; @@ -152,6 +153,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) rose_start_heartbeat(sk); bh_unlock_sock(sk); + sock_put(sk); } static void rose_timer_expiry(struct timer_list *t) @@ -181,6 +183,7 @@ static void rose_timer_expiry(struct timer_list *t) break; } bh_unlock_sock(sk); + sock_put(sk); } static void rose_idletimer_expiry(struct timer_list *t) @@ -205,4 +208,5 @@ static void rose_idletimer_expiry(struct timer_list *t) sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); + sock_put(sk); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 3bad9f5f9102..ccb65412b670 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -659,13 +659,12 @@ struct rxrpc_call { spinlock_t input_lock; /* Lock for packet input to this call */ - /* receive-phase ACK management */ + /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ - rxrpc_serial_t ackr_first_seq; /* first sequence number received */ - rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ - rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ - rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ + rxrpc_seq_t ackr_highest_seq; /* Higest sequence number received */ + atomic_t ackr_nr_unacked; /* Number of unacked packets */ + atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */ /* RTT management */ rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */ @@ -675,8 +674,10 @@ struct rxrpc_call { #define RXRPC_CALL_RTT_AVAIL_MASK 0xf #define RXRPC_CALL_RTT_PEND_SHIFT 8 - /* transmission-phase ACK management */ + /* Transmission-phase ACK management (ACKs we've received). */ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ + rxrpc_seq_t acks_first_seq; /* first sequence number received */ + rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 22e05de5d1ca..f8ecad2b730e 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -377,9 +377,9 @@ void rxrpc_process_call(struct work_struct *work) if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && (int)call->conn->hi_serial - (int)call->rx_serial > 0) { trace_rxrpc_call_reset(call); - rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ECONNRESET); + rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET); } else { - rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); } set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; @@ -406,7 +406,8 @@ void rxrpc_process_call(struct work_struct *work) goto recheck_state; } - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) && + call->state != RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_resend(call, now); goto recheck_state; } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 3bcbe0665f91..3ef05a0e90ad 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -184,7 +184,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; default: - chan->last_abort = RX_USER_ABORT; + chan->last_abort = RX_CALL_DEAD; chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index dc201363f2c4..1145cb14d86f 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -412,8 +412,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); enum rxrpc_call_state state; - unsigned int j, nr_subpackets; - rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; + unsigned int j, nr_subpackets, nr_unacked = 0; + rxrpc_serial_t serial = sp->hdr.serial, ack_serial = serial; rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack; bool immediate_ack = false, jumbo_bad = false; u8 ack = 0; @@ -453,7 +453,6 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) !rxrpc_receiving_reply(call)) goto unlock; - call->ackr_prev_seq = seq0; hard_ack = READ_ONCE(call->rx_hard_ack); nr_subpackets = sp->nr_subpackets; @@ -534,6 +533,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) ack_serial = serial; } + if (after(seq0, call->ackr_highest_seq)) + call->ackr_highest_seq = seq0; + /* Queue the packet. We use a couple of memory barriers here as need * to make sure that rx_top is perceived to be set after the buffer * pointer and that the buffer pointer is set after the annotation and @@ -567,6 +569,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) sp = NULL; } + nr_unacked++; + if (last) { set_bit(RXRPC_CALL_RX_LAST, &call->flags); if (!ack) { @@ -586,9 +590,14 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) } call->rx_expect_next = seq + 1; } + if (!ack) + ack_serial = serial; } ack: + if (atomic_add_return(nr_unacked, &call->ackr_nr_unacked) > 2 && !ack) + ack = RXRPC_ACK_IDLE; + if (ack) rxrpc_propose_ACK(call, ack, ack_serial, immediate_ack, true, @@ -812,7 +821,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, static bool rxrpc_is_ack_valid(struct rxrpc_call *call, rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt) { - rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq); + rxrpc_seq_t base = READ_ONCE(call->acks_first_seq); if (after(first_pkt, base)) return true; /* The window advanced */ @@ -820,7 +829,7 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call, if (before(first_pkt, base)) return false; /* firstPacket regressed */ - if (after_eq(prev_pkt, call->ackr_prev_seq)) + if (after_eq(prev_pkt, call->acks_prev_seq)) return true; /* previousPacket hasn't regressed. */ /* Some rx implementations put a serial number in previousPacket. */ @@ -906,8 +915,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) /* Discard any out-of-order or duplicate ACKs (outside lock). */ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, - first_soft_ack, call->ackr_first_seq, - prev_pkt, call->ackr_prev_seq); + first_soft_ack, call->acks_first_seq, + prev_pkt, call->acks_prev_seq); return; } @@ -922,14 +931,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) /* Discard any out-of-order or duplicate ACKs (inside lock). */ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, - first_soft_ack, call->ackr_first_seq, - prev_pkt, call->ackr_prev_seq); + first_soft_ack, call->acks_first_seq, + prev_pkt, call->acks_prev_seq); goto out; } call->acks_latest_ts = skb->tstamp; - call->ackr_first_seq = first_soft_ack; - call->ackr_prev_seq = prev_pkt; + call->acks_first_seq = first_soft_ack; + call->acks_prev_seq = prev_pkt; /* Parse rwind and mtu sizes if provided. */ if (buf.info.rxMTU) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a45c83f22236..9683617db704 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -74,11 +74,18 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, u8 reason) { rxrpc_serial_t serial; + unsigned int tmp; rxrpc_seq_t hard_ack, top, seq; int ix; u32 mtu, jmax; u8 *ackp = pkt->acks; + tmp = atomic_xchg(&call->ackr_nr_unacked, 0); + tmp |= atomic_xchg(&call->ackr_nr_consumed, 0); + if (!tmp && (reason == RXRPC_ACK_DELAY || + reason == RXRPC_ACK_IDLE)) + return 0; + /* Barrier against rxrpc_input_data(). */ serial = call->ackr_serial; hard_ack = READ_ONCE(call->rx_hard_ack); @@ -89,7 +96,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, pkt->ack.bufferSpace = htons(8); pkt->ack.maxSkew = htons(0); pkt->ack.firstPacket = htonl(hard_ack + 1); - pkt->ack.previousPacket = htonl(call->ackr_prev_seq); + pkt->ack.previousPacket = htonl(call->ackr_highest_seq); pkt->ack.serial = htonl(serial); pkt->ack.reason = reason; pkt->ack.nAcks = top - hard_ack; @@ -223,6 +230,10 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); spin_unlock_bh(&call->lock); + if (n == 0) { + kfree(pkt); + return 0; + } iov[0].iov_base = pkt; iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; @@ -259,13 +270,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, ntohl(pkt->ack.serial), false, true, rxrpc_propose_ack_retry_tx); - } else { - spin_lock_bh(&call->lock); - if (after(hard_ack, call->ackr_consumed)) - call->ackr_consumed = hard_ack; - if (after(top, call->ackr_seen)) - call->ackr_seen = top; - spin_unlock_bh(&call->lock); } rxrpc_set_keepalive(call); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 2c842851d72e..787826773937 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -260,11 +260,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_end_rx_phase(call, serial); } else { /* Check to see if there's an ACK that needs sending. */ - if (after_eq(hard_ack, call->ackr_consumed + 2) || - after_eq(top, call->ackr_seen + 2) || - (hard_ack == top && after(hard_ack, call->ackr_consumed))) - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, - true, true, + if (atomic_inc_return(&call->ackr_nr_consumed) > 2) + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, + true, false, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) rxrpc_send_ack_packet(call, false, NULL); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index d27140c836cc..aa23ba4e2566 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -461,6 +461,12 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, success: ret = copied; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { + read_lock_bh(&call->state_lock); + if (call->error < 0) + ret = call->error; + read_unlock_bh(&call->state_lock); + } out: call->tx_pending = skb; _leave(" = %d", ret); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 540351d6a5f4..555e0910786b 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -12,7 +12,7 @@ static struct ctl_table_header *rxrpc_sysctl_reg_table; static const unsigned int four = 4; -static const unsigned int thirtytwo = 32; +static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1; static const unsigned int n_65535 = 65535; static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; static const unsigned long one_jiffy = 1; @@ -89,7 +89,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (void *)&four, - .extra2 = (void *)&thirtytwo, + .extra2 = (void *)&max_backlog, }, { .procname = "rx_window_size", diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 7b29aa1a3ce9..4ab9c2a6f650 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -302,7 +302,8 @@ static int tcf_idr_release_unsafe(struct tc_action *p) } static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct nlattr *nest; int n_i = 0; @@ -318,20 +319,25 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; + ret = 0; mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p)) continue; ret = tcf_idr_release_unsafe(p); - if (ret == ACT_P_DELETED) { + if (ret == ACT_P_DELETED) module_put(ops->owner); - n_i++; - } else if (ret < 0) { - mutex_unlock(&idrinfo->lock); - goto nla_put_failure; - } + else if (ret < 0) + break; + n_i++; } mutex_unlock(&idrinfo->lock); + if (ret < 0) { + if (n_i) + NL_SET_ERR_MSG(extack, "Unable to flush all TC actions"); + else + goto nla_put_failure; + } ret = nla_put_u32(skb, TCA_FCNT, n_i); if (ret) @@ -352,7 +358,7 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(idrinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops, extack); } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 90510298b32a..0d5463ddfd62 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -232,6 +232,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, for (i = 0; i < p->tcfp_nkeys; ++i) { u32 cur = p->tcfp_keys[i].off; + /* sanitize the shift value for any later use */ + p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1, + p->tcfp_keys[i].shift); + /* The AT option can read a single byte, we can bound the actual * value with uchar max. */ diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8d8452b1cdd4..380733588959 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -213,6 +213,20 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, return err; } +static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit) +{ + u32 len; + + if (skb_is_gso(skb)) + return skb_gso_validate_mac_len(skb, limit); + + len = qdisc_pkt_len(skb); + if (skb_at_tc_ingress(skb)) + len += skb->mac_len; + + return len <= limit; +} + static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -235,7 +249,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, goto inc_overlimits; } - if (qdisc_pkt_len(skb) <= p->tcfp_mtu) { + if (tcf_police_mtu_check(skb, p->tcfp_mtu)) { if (!p->rate_present) { ret = p->tcfp_result; goto end; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0c345e43a09a..adc5407fd5d5 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1146,9 +1146,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_rate rate; struct tc_netem_slot slot; - qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), + qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency), UINT_MAX); - qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter), + qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter), UINT_MAX); qopt.limit = q->limit; qopt.loss = q->loss; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index fdb69d46276d..2d4ec6187755 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -226,9 +226,8 @@ static struct sctp_association *sctp_association_init( if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, - 0, gfp)) - goto fail_init; + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) + goto stream_free; /* Initialize default path MTU. */ asoc->pathmtu = sp->pathmtu; diff --git a/net/sctp/input.c b/net/sctp/input.c index 34494a0b28bd..8f3aab6a4458 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -92,6 +92,7 @@ int sctp_rcv(struct sk_buff *skb) struct sctp_chunk *chunk; union sctp_addr src; union sctp_addr dest; + int bound_dev_if; int family; struct sctp_af *af; struct net *net = dev_net(skb->dev); @@ -169,7 +170,8 @@ int sctp_rcv(struct sk_buff *skb) * If a frame arrives on an interface and the receiving socket is * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ - if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) { if (transport) { sctp_transport_put(transport); asoc = NULL; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 940f1e257a90..6e4ca837e91d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && ret != RTN_LOCAL && !sp->inet.freebind && - !net->ipv4.sysctl_ip_nonlocal_bind) + !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind)) return 0; if (ipv6_only_sock(sctp_opt2sk(sp))) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 6dc95dcc0ff4..ef9fceadef8d 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, ret = sctp_stream_alloc_out(stream, outcnt, gfp); if (ret) - goto out_err; + return ret; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; @@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, handle_in: sctp_stream_interleave_init(stream); if (!incnt) - goto out; + return 0; - ret = sctp_stream_alloc_in(stream, incnt, gfp); - if (ret) - goto in_err; - - goto out; - -in_err: - sched->free(stream); - genradix_free(&stream->in); -out_err: - genradix_free(&stream->out); - stream->outcnt = 0; -out: - return ret; + return sctp_stream_alloc_in(stream, incnt, gfp); } int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 99e5f69fbb74..a2e1d34f52c5 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, if (!SCTP_SO(&asoc->stream, i)->ext) continue; - ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); + ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); if (ret) goto err; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 35db3260e8d5..5d7710dd9514 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1118,9 +1118,9 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, if (rc && rc != -EINPROGRESS) goto out; - sock_hold(&smc->sk); /* sock put in passive closing */ if (smc->use_fallback) goto out; + sock_hold(&smc->sk); /* sock put in passive closing */ if (flags & O_NONBLOCK) { if (queue_work(smc_hs_wq, &smc->connect_work)) smc->connect_nonblock = 1; diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 0c490cdde6a4..94503f36b9a6 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -72,7 +72,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, /* abnormal termination */ if (!rc) smc_wr_tx_put_slot(link, - (struct smc_wr_tx_pend_priv *)pend); + (struct smc_wr_tx_pend_priv *)(*pend)); rc = -EPIPE; } return rc; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index ee1f0fdba085..0ef15f8fba90 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1787,7 +1787,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); - lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time; + lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } /* called after lgr was removed from lgr_list */ diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 71e03b930b70..d84bb5037bb5 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -752,7 +752,11 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, */ xdr->p = (void *)p + frag2bytes; space_left = xdr->buf->buflen - xdr->buf->len; - xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE); + if (space_left - frag1bytes >= PAGE_SIZE) + xdr->end = (void *)p + PAGE_SIZE; + else + xdr->end = (void *)p + space_left - frag1bytes; + xdr->buf->page_len += frag2bytes; xdr->buf->len += nbytes; return p; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index ca267a855a12..b8174c77dfe1 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1137,6 +1137,7 @@ static bool rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) #if defined(CONFIG_SUNRPC_BACKCHANNEL) { + struct rpc_xprt *xprt = &r_xprt->rx_xprt; struct xdr_stream *xdr = &rep->rr_stream; __be32 *p; @@ -1160,6 +1161,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) if (*p != cpu_to_be32(RPC_CALL)) return false; + /* No bc service. */ + if (xprt->bc_serv == NULL) + return false; + /* Now that we are sure this is a backchannel call, * advance to the RPC header. */ diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6911f1cab206..72c31ef985eb 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -249,9 +249,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, u32 i; if (!bearer_name_validate(name, &b_names)) { - errstr = "illegal name"; NL_SET_ERR_MSG(extack, "Illegal name"); - goto rejected; + return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { diff --git a/net/tipc/core.c b/net/tipc/core.c index 40c03085c0ea..7724499f516e 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -60,7 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->trial_addr = 0; tn->addr_trial_end = 0; tn->capabilities = TIPC_NODE_CAPABILITIES; - INIT_WORK(&tn->final_work.work, tipc_net_finalize_work); + INIT_WORK(&tn->work, tipc_net_finalize_work); memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; @@ -111,10 +111,9 @@ static void __net_exit tipc_exit_net(struct net *net) struct tipc_net *tn = tipc_net(net); tipc_detach_loopback(net); - /* Make sure the tipc_net_finalize_work() finished */ - cancel_work_sync(&tn->final_work.work); tipc_net_stop(net); - + /* Make sure the tipc_net_finalize_work() finished */ + cancel_work_sync(&tn->work); tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); diff --git a/net/tipc/core.h b/net/tipc/core.h index 992924a849be..73a26b0b9ca1 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -90,12 +90,6 @@ extern unsigned int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; extern int sysctl_tipc_named_timeout __read_mostly; -struct tipc_net_work { - struct work_struct work; - struct net *net; - u32 addr; -}; - struct tipc_net { u8 node_id[NODE_ID_LEN]; u32 node_addr; @@ -150,7 +144,7 @@ struct tipc_net { struct tipc_crypto *crypto_tx; #endif /* Work item for net finalize */ - struct tipc_net_work final_work; + struct work_struct work; /* The numbers of work queues in schedule */ atomic_t wq_count; }; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index d4ecacddb40c..14bc20604051 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -167,7 +167,7 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, /* Apply trial address if we just left trial period */ if (!trial && !self) { - tipc_sched_net_finalize(net, tn->trial_addr); + schedule_work(&tn->work); msg_set_prevnode(buf_msg(d->skb), tn->trial_addr); msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); } @@ -307,7 +307,7 @@ static void tipc_disc_timeout(struct timer_list *t) if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) { mod_timer(&d->timer, jiffies + TIPC_DISC_INIT); spin_unlock_bh(&d->lock); - tipc_sched_net_finalize(net, tn->trial_addr); + schedule_work(&tn->work); return; } diff --git a/net/tipc/link.c b/net/tipc/link.c index 7a353ff62844..064fdb8e50e1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -344,6 +344,11 @@ char tipc_link_plane(struct tipc_link *l) return l->net_plane; } +struct net *tipc_link_net(struct tipc_link *l) +{ + return l->net; +} + void tipc_link_update_caps(struct tipc_link *l, u16 capabilities) { l->peer_caps = capabilities; diff --git a/net/tipc/link.h b/net/tipc/link.h index fc07232c9a12..a16f401fdabd 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -156,4 +156,5 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); bool tipc_link_too_silent(struct tipc_link *l); +struct net *tipc_link_net(struct tipc_link *l); #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index 0bb2323201da..671cb4f9d563 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -41,6 +41,7 @@ #include "socket.h" #include "node.h" #include "bcast.h" +#include "link.h" #include "netlink.h" #include "monitor.h" @@ -138,19 +139,9 @@ static void tipc_net_finalize(struct net *net, u32 addr) void tipc_net_finalize_work(struct work_struct *work) { - struct tipc_net_work *fwork; + struct tipc_net *tn = container_of(work, struct tipc_net, work); - fwork = container_of(work, struct tipc_net_work, work); - tipc_net_finalize(fwork->net, fwork->addr); -} - -void tipc_sched_net_finalize(struct net *net, u32 addr) -{ - struct tipc_net *tn = tipc_net(net); - - tn->final_work.net = net; - tn->final_work.addr = addr; - schedule_work(&tn->final_work.work); + tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr); } void tipc_net_stop(struct net *net) diff --git a/net/tipc/node.c b/net/tipc/node.c index e4452d55851f..60059827563a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -456,8 +456,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, bool preliminary) { struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l, *snd_l = tipc_bc_sndlink(net); struct tipc_node *n, *temp_node; - struct tipc_link *l; unsigned long intv; int bearer_id; int i; @@ -472,6 +472,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, goto exit; /* A preliminary node becomes "real" now, refresh its data */ tipc_node_write_lock(n); + if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link refresh failed, no memory\n"); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); + n = NULL; + goto exit; + } n->preliminary = false; n->addr = addr; hlist_del_rcu(&n->hash); @@ -551,7 +561,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; - n->bc_entry.link = NULL; + if (!preliminary && + !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no memory\n"); + kfree(n); + n = NULL; + goto exit; + } tipc_node_get(n); timer_setup(&n->timer, tipc_node_timeout, 0); /* Start a slow timer anyway, crypto needs it */ @@ -1128,7 +1147,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool *respond, bool *dupl_addr) { struct tipc_node *n; - struct tipc_link *l, *snd_l; + struct tipc_link *l; struct tipc_link_entry *le; bool addr_match = false; bool sign_match = false; @@ -1148,22 +1167,6 @@ void tipc_node_check_dest(struct net *net, u32 addr, return; tipc_node_write_lock(n); - if (unlikely(!n->bc_entry.link)) { - snd_l = tipc_bc_sndlink(net); - if (!tipc_link_bc_create(net, tipc_own_addr(net), - addr, peer_id, U16_MAX, - tipc_link_min_win(snd_l), - tipc_link_max_win(snd_l), - n->capabilities, - &n->bc_entry.inputq1, - &n->bc_entry.namedq, snd_l, - &n->bc_entry.link)) { - pr_warn("Broadcast rcv link creation failed, no mem\n"); - tipc_node_write_unlock_fast(n); - tipc_node_put(n); - return; - } - } le = &n->links[b->identity]; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 42283dc6c5b7..38256aabf4f1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -489,6 +489,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock_init_data(sock, sk); tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { + sk_free(sk); pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 3c82286e5bcc..5cb6846544cc 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -97,13 +97,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx) unsigned long flags; spin_lock_irqsave(&tls_device_lock, flags); + if (unlikely(!refcount_dec_and_test(&ctx->refcount))) + goto unlock; + list_move_tail(&ctx->list, &tls_device_gc_list); /* schedule_work inside the spinlock * to make sure tls_device_down waits for that work. */ schedule_work(&tls_device_gc_work); - +unlock: spin_unlock_irqrestore(&tls_device_lock, flags); } @@ -194,8 +197,7 @@ void tls_device_sk_destruct(struct sock *sk) clean_acked_data_disable(inet_csk(sk)); } - if (refcount_dec_and_test(&tls_ctx->refcount)) - tls_device_queue_ctx_destruction(tls_ctx); + tls_device_queue_ctx_destruction(tls_ctx); } EXPORT_SYMBOL_GPL(tls_device_sk_destruct); @@ -1347,8 +1349,13 @@ static int tls_device_down(struct net_device *netdev) * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. * Now release the ref taken above. */ - if (refcount_dec_and_test(&ctx->refcount)) + if (refcount_dec_and_test(&ctx->refcount)) { + /* sk_destruct ran after tls_device_down took a ref, and + * it returned early. Complete the destruction here. + */ + list_del(&ctx->list); tls_device_free_ctx(ctx); + } } up_write(&device_offload_lock); @@ -1390,9 +1397,9 @@ static struct notifier_block tls_dev_notifier = { .notifier_call = tls_dev_event, }; -void __init tls_device_init(void) +int __init tls_device_init(void) { - register_netdevice_notifier(&tls_dev_notifier); + return register_netdevice_notifier(&tls_dev_notifier); } void __exit tls_device_cleanup(void) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 58d22d6b86ae..e537085b184f 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -905,7 +905,12 @@ static int __init tls_register(void) if (err) return err; - tls_device_init(); + err = tls_device_init(); + if (err) { + unregister_pernet_subsys(&tls_proc_ops); + return err; + } + tcp_register_ulp(&tcp_tls_ulp_ops); return 0; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fa99fe5bcf6c..2aa01afe1b03 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -438,7 +438,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) * -ECONNREFUSED. Otherwise, if we haven't queued any skbs * to other and its full, we will hang waiting for POLLOUT. */ - if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD)) + if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) return 1; if (connected) diff --git a/net/wireless/core.c b/net/wireless/core.c index 3f4554723761..3b25b78896a2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -918,9 +918,6 @@ int wiphy_register(struct wiphy *wiphy) return res; } - /* set up regulatory info */ - wiphy_regulatory_register(wiphy); - list_add_rcu(&rdev->list, &cfg80211_rdev_list); cfg80211_rdev_list_generation++; @@ -931,6 +928,9 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_debugfs_rdev_add(rdev); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); + /* set up regulatory info */ + wiphy_regulatory_register(wiphy); + if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) { struct regulatory_request request; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5d8959a5e760..2bd8196ed719 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2958,6 +2958,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) { chandef->width = nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]); + if (chandef->chan->band == NL80211_BAND_S1GHZ) { + /* User input error for channel width doesn't match channel */ + if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) { + NL_SET_ERR_MSG_ATTR(extack, + attrs[NL80211_ATTR_CHANNEL_WIDTH], + "bad channel width"); + return -EINVAL; + } + } if (attrs[NL80211_ATTR_CENTER_FREQ1]) { chandef->center_freq1 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]); @@ -3479,6 +3488,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag wdev_lock(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: if (wdev->ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) goto nla_put_failure_locked; @@ -11095,18 +11105,23 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct cfg80211_bitrate_mask mask; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; + wdev_lock(wdev); err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, dev); if (err) - return err; + goto out; - return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask); +out: + wdev_unlock(wdev); + return err; } static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index a04fdfb35f07..fd848609e656 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -787,6 +787,8 @@ static int __init load_builtin_regdb_keys(void) return 0; } +MODULE_FIRMWARE("regulatory.db.p7s"); + static bool regdb_has_valid_signature(const u8 *data, unsigned int size) { const struct firmware *sig; @@ -1058,6 +1060,8 @@ static void regdb_fw_cb(const struct firmware *fw, void *context) release_firmware(fw); } +MODULE_FIRMWARE("regulatory.db"); + static int query_regdb_file(const char *alpha2) { ASSERT_RTNL(); @@ -4001,6 +4005,7 @@ void wiphy_regulatory_register(struct wiphy *wiphy) wiphy_update_regulatory(wiphy, lr->initiator); wiphy_all_share_dfs_chan_state(wiphy); + reg_process_self_managed_hints(); } void wiphy_regulatory_deregister(struct wiphy *wiphy) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 2ef6f926610e..e63a285a9856 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -318,6 +318,7 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs) for (i = 0; i < dma_map->dma_pages_cnt; i++) { dma = &dma_map->dma_pages[i]; if (*dma) { + *dma &= ~XSK_NEXT_PG_CONTIG_MASK; dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE, DMA_BIDIRECTIONAL, attrs); *dma = 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9c5811e4ebc4..57269824a0fb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2685,8 +2685,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, *num_xfrms = 0; return 0; } - if (IS_ERR(pols[0])) + if (IS_ERR(pols[0])) { + *num_pols = 0; return PTR_ERR(pols[0]); + } *num_xfrms = pols[0]->xfrm_nr; @@ -2701,6 +2703,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, if (pols[1]) { if (IS_ERR(pols[1])) { xfrm_pols_put(pols, *num_pols); + *num_pols = 0; return PTR_ERR(pols[1]); } (*num_pols)++; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 119c34717791..9eaa0485881b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1020,7 +1020,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, if ((x->sel.family && (x->sel.family != family || !xfrm_selector_match(&x->sel, fl, family))) || - !security_xfrm_state_pol_flow_match(x, pol, fl)) + !security_xfrm_state_pol_flow_match(x, pol, + &fl->u.__fl_common)) return; if (!*best || @@ -1035,7 +1036,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, if ((!x->sel.family || (x->sel.family == family && xfrm_selector_match(&x->sel, fl, family))) && - security_xfrm_state_pol_flow_match(x, pol, fl)) + security_xfrm_state_pol_flow_match(x, pol, + &fl->u.__fl_common)) *error = -ESRCH; } } @@ -2567,7 +2569,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) int err; if (family == AF_INET && - xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc) + READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)) x->props.flags |= XFRM_STATE_NOPMTUDISC; err = -EPROTONOSUPPORT; diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index 89e6bf27cd9f..d620f3da086f 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -31,7 +31,7 @@ asm ( " call my_direct_func1\n" " leave\n" " .size my_tramp1, .-my_tramp1\n" -" ret\n" + ASM_RET " .type my_tramp2, @function\n" " .globl my_tramp2\n" " my_tramp2:" @@ -39,7 +39,7 @@ asm ( " movq %rsp, %rbp\n" " call my_direct_func2\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp2, .-my_tramp2\n" " .popsection\n" ); diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 11b99325f3db..3927cb880d1a 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -31,7 +31,7 @@ asm ( " popq %rsi\n" " popq %rdi\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp, .-my_tramp\n" " .popsection\n" ); diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 642c50b5f716..1e901bb8d729 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -24,7 +24,7 @@ asm ( " call my_direct_func\n" " popq %rdi\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp, .-my_tramp\n" " .popsection\n" ); diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index d519ebbf0c0a..657e8488b5c4 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -243,7 +243,9 @@ objtool_args = \ $(if $(or $(CONFIG_GCOV_KERNEL),$(CONFIG_LTO_CLANG)), \ --no-unreachable,) \ $(if $(CONFIG_RETPOLINE), --retpoline,) \ + $(if $(CONFIG_RETHUNK), --rethunk,) \ $(if $(CONFIG_X86_SMAP), --uaccess,) \ + $(if $(CONFIG_SLS), --sls,) \ $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount,) # Useful for describing the dependency of composite objects diff --git a/scripts/faddr2line b/scripts/faddr2line index 6c6439f69a72..94ed98dd899f 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -44,17 +44,6 @@ set -o errexit set -o nounset -READELF="${CROSS_COMPILE:-}readelf" -ADDR2LINE="${CROSS_COMPILE:-}addr2line" -SIZE="${CROSS_COMPILE:-}size" -NM="${CROSS_COMPILE:-}nm" - -command -v awk >/dev/null 2>&1 || die "awk isn't installed" -command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" -command -v ${ADDR2LINE} >/dev/null 2>&1 || die "addr2line isn't installed" -command -v ${SIZE} >/dev/null 2>&1 || die "size isn't installed" -command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed" - usage() { echo "usage: faddr2line [--list] ..." >&2 exit 1 @@ -69,6 +58,14 @@ die() { exit 1 } +READELF="${CROSS_COMPILE:-}readelf" +ADDR2LINE="${CROSS_COMPILE:-}addr2line" +AWK="awk" + +command -v ${AWK} >/dev/null 2>&1 || die "${AWK} isn't installed" +command -v ${READELF} >/dev/null 2>&1 || die "${READELF} isn't installed" +command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed" + # Try to figure out the source directory prefix so we can remove it from the # addr2line output. HACK ALERT: This assumes that start_kernel() is in # init/main.c! This only works for vmlinux. Otherwise it falls back to @@ -76,7 +73,7 @@ die() { find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} --symbols --wide $objfile | ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) @@ -97,86 +94,156 @@ __faddr2line() { local dir_prefix=$3 local print_warnings=$4 - local func=${func_addr%+*} - local offset=${func_addr#*+} - offset=${offset%/*} - local size= - [[ $func_addr =~ "/" ]] && size=${func_addr#*/} + local sym_name=${func_addr%+*} + local func_offset=${func_addr#*+} + func_offset=${func_offset%/*} + local user_size= + local file_type + local is_vmlinux=0 + [[ $func_addr =~ "/" ]] && user_size=${func_addr#*/} - if [[ -z $func ]] || [[ -z $offset ]] || [[ $func = $func_addr ]]; then + if [[ -z $sym_name ]] || [[ -z $func_offset ]] || [[ $sym_name = $func_addr ]]; then warn "bad func+offset $func_addr" DONE=1 return fi - # Go through each of the object's symbols which match the func name. - # In rare cases there might be duplicates. - file_end=$(${SIZE} -Ax $objfile | awk '$1 == ".text" {print $2}') - while read symbol; do - local fields=($symbol) - local sym_base=0x${fields[0]} - local sym_type=${fields[1]} - local sym_end=${fields[3]} + # vmlinux uses absolute addresses in the section table rather than + # section offsets. + local file_type=$(${READELF} --file-header $objfile | + ${AWK} '$1 == "Type:" { print $2; exit }') + [[ $file_type = "EXEC" ]] && is_vmlinux=1 - # calculate the size - local sym_size=$(($sym_end - $sym_base)) - if [[ -z $sym_size ]] || [[ $sym_size -le 0 ]]; then - warn "bad symbol size: base: $sym_base end: $sym_end" + # Go through each of the object's symbols which match the func name. + # In rare cases there might be duplicates, in which case we print all + # matches. + while read line; do + local fields=($line) + local sym_addr=0x${fields[1]} + local sym_elf_size=${fields[2]} + local sym_sec=${fields[6]} + local sec_size + local sec_name + + # Get the section size: + sec_size=$(${READELF} --section-headers --wide $objfile | + sed 's/\[ /\[/' | + ${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print "0x" $6; exit }') + + if [[ -z $sec_size ]]; then + warn "bad section size: section: $sym_sec" DONE=1 return fi + + # Get the section name: + sec_name=$(${READELF} --section-headers --wide $objfile | + sed 's/\[ /\[/' | + ${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print $2; exit }') + + if [[ -z $sec_name ]]; then + warn "bad section name: section: $sym_sec" + DONE=1 + return + fi + + # Calculate the symbol size. + # + # Unfortunately we can't use the ELF size, because kallsyms + # also includes the padding bytes in its size calculation. For + # kallsyms, the size calculation is the distance between the + # symbol and the next symbol in a sorted list. + local sym_size + local cur_sym_addr + local found=0 + while read line; do + local fields=($line) + cur_sym_addr=0x${fields[1]} + local cur_sym_elf_size=${fields[2]} + local cur_sym_name=${fields[7]:-} + + if [[ $cur_sym_addr = $sym_addr ]] && + [[ $cur_sym_elf_size = $sym_elf_size ]] && + [[ $cur_sym_name = $sym_name ]]; then + found=1 + continue + fi + + if [[ $found = 1 ]]; then + sym_size=$(($cur_sym_addr - $sym_addr)) + [[ $sym_size -lt $sym_elf_size ]] && continue; + found=2 + break + fi + done < <(${READELF} --symbols --wide $objfile | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) + + if [[ $found = 0 ]]; then + warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size" + DONE=1 + return + fi + + # If nothing was found after the symbol, assume it's the last + # symbol in the section. + [[ $found = 1 ]] && sym_size=$(($sec_size - $sym_addr)) + + if [[ -z $sym_size ]] || [[ $sym_size -le 0 ]]; then + warn "bad symbol size: sym_addr: $sym_addr cur_sym_addr: $cur_sym_addr" + DONE=1 + return + fi + sym_size=0x$(printf %x $sym_size) - # calculate the address - local addr=$(($sym_base + $offset)) + # Calculate the address from user-supplied offset: + local addr=$(($sym_addr + $func_offset)) if [[ -z $addr ]] || [[ $addr = 0 ]]; then - warn "bad address: $sym_base + $offset" + warn "bad address: $sym_addr + $func_offset" DONE=1 return fi addr=0x$(printf %x $addr) - # weed out non-function symbols - if [[ $sym_type != t ]] && [[ $sym_type != T ]]; then + # If the user provided a size, make sure it matches the symbol's size: + if [[ -n $user_size ]] && [[ $user_size -ne $sym_size ]]; then [[ $print_warnings = 1 ]] && - echo "skipping $func address at $addr due to non-function symbol of type '$sym_type'" - continue - fi - - # if the user provided a size, make sure it matches the symbol's size - if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then - [[ $print_warnings = 1 ]] && - echo "skipping $func address at $addr due to size mismatch ($size != $sym_size)" + echo "skipping $sym_name address at $addr due to size mismatch ($user_size != $sym_size)" continue; fi - # make sure the provided offset is within the symbol's range - if [[ $offset -gt $sym_size ]]; then + # Make sure the provided offset is within the symbol's range: + if [[ $func_offset -gt $sym_size ]]; then [[ $print_warnings = 1 ]] && - echo "skipping $func address at $addr due to size mismatch ($offset > $sym_size)" + echo "skipping $sym_name address at $addr due to size mismatch ($func_offset > $sym_size)" continue fi - # separate multiple entries with a blank line + # In case of duplicates or multiple addresses specified on the + # cmdline, separate multiple entries with a blank line: [[ $FIRST = 0 ]] && echo FIRST=0 - # pass real address to addr2line - echo "$func+$offset/$sym_size:" - local file_lines=$(${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;") - [[ -z $file_lines ]] && return + echo "$sym_name+$func_offset/$sym_size:" + # Pass section address to addr2line and strip absolute paths + # from the output: + local args="--functions --pretty-print --inlines --exe=$objfile" + [[ $is_vmlinux = 0 ]] && args="$args --section=$sec_name" + local output=$(${ADDR2LINE} $args $addr | sed "s; $dir_prefix\(\./\)*; ;") + [[ -z $output ]] && continue + + # Default output (non --list): if [[ $LIST = 0 ]]; then - echo "$file_lines" | while read -r line + echo "$output" | while read -r line do echo $line done DONE=1; - return + continue fi - # show each line with context - echo "$file_lines" | while read -r line + # For --list, show each line with its corresponding source code: + echo "$output" | while read -r line do echo echo $line @@ -184,12 +251,12 @@ __faddr2line() { n1=$[$n-5] n2=$[$n+5] f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g') - awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") { if (NR=='$n') printf(">%d<", NR); else printf(" %d ", NR); printf("\t%s\n", $0)}' $f + ${AWK} 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") { if (NR=='$n') printf(">%d<", NR); else printf(" %d ", NR); printf("\t%s\n", $0)}' $f done DONE=1 - done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') + done < <(${READELF} --symbols --wide $objfile | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') } [[ $# -lt 2 ]] && usage diff --git a/scripts/gdb/linux/config.py b/scripts/gdb/linux/config.py index 90e1565b1967..8843ab3cbadd 100644 --- a/scripts/gdb/linux/config.py +++ b/scripts/gdb/linux/config.py @@ -24,9 +24,9 @@ class LxConfigDump(gdb.Command): filename = arg try: - py_config_ptr = gdb.parse_and_eval("kernel_config_data + 8") - py_config_size = gdb.parse_and_eval( - "sizeof(kernel_config_data) - 1 - 8 * 2") + py_config_ptr = gdb.parse_and_eval("&kernel_config_data") + py_config_ptr_end = gdb.parse_and_eval("&kernel_config_data_end") + py_config_size = py_config_ptr_end - py_config_ptr except gdb.error as e: raise gdb.GdbError("Can't find config, enable CONFIG_IKCONFIG?") diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 6ae0d70089b4..f6e395dcf77f 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -129,6 +129,9 @@ objtool_link() objtoolcmd="check" fi objtoolopt="${objtoolopt} --vmlinux" + if [ -n "${CONFIG_CPU_UNRET_ENTRY}" ]; then + objtoolopt="${objtoolopt} --unret" + fi if [ -z "${CONFIG_FRAME_POINTER}" ]; then objtoolopt="${objtoolopt} --no-fp" fi @@ -141,6 +144,9 @@ objtool_link() if [ -n "${CONFIG_X86_SMAP}" ]; then objtoolopt="${objtoolopt} --uaccess" fi + if [ -n "${CONFIG_SLS}" ]; then + objtoolopt="${objtoolopt} --sls" + fi info OBJTOOL ${1} tools/objtool/objtool ${objtoolcmd} ${objtoolopt} ${1} fi diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 9c8c7511d10d..0cb8de405183 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1115,7 +1115,7 @@ static const struct sectioncheck sectioncheck[] = { }, /* Do not export init/exit functions or data */ { - .fromsec = { "__ksymtab*", NULL }, + .fromsec = { "___ksymtab*", NULL }, .bad_tosec = { INIT_SECTIONS, EXIT_SECTIONS, NULL }, .mismatch = EXPORT_TO_INIT_EXIT, .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL }, @@ -1267,7 +1267,8 @@ static int secref_whitelist(const struct sectioncheck *mismatch, static inline int is_arm_mapping_symbol(const char *str) { - return str[0] == '$' && strchr("axtd", str[1]) + return str[0] == '$' && + (str[1] == 'a' || str[1] == 'd' || str[1] == 't' || str[1] == 'x') && (str[2] == '\0' || str[2] == '.'); } @@ -1978,7 +1979,7 @@ static char *remove_dot(char *s) if (n && s[n]) { size_t m = strspn(s + n + 1, "0123456789"); - if (m && (s[n + m] == '.' || s[n + m] == 0)) + if (m && (s[n + m + 1] == '.' || s[n + m + 1] == 0)) s[n] = 0; /* strip trailing .lto */ diff --git a/security/Kconfig b/security/Kconfig index 0548db16c49d..9893c316da89 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -54,17 +54,6 @@ config SECURITY_NETWORK implement socket and networking access controls. If you are unsure how to answer this question, answer N. -config PAGE_TABLE_ISOLATION - bool "Remove the kernel mapping in user mode" - default y - depends on (X86_64 || X86_PAE) && !UML - help - This feature reduces the number of hardware side channels by - ensuring that the majority of kernel addresses are not mapped - into userspace. - - See Documentation/x86/pti.rst for more details. - config SECURITY_INFINIBAND bool "Infiniband Security Hooks" depends on SECURITY && INFINIBAND diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index a6dd47eb086d..168c3b78ac47 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -73,7 +73,7 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) { long rc; const char *algo; - struct crypto_shash **tfm, *tmp_tfm = NULL; + struct crypto_shash **tfm, *tmp_tfm; struct shash_desc *desc; if (type == EVM_XATTR_HMAC) { @@ -118,16 +118,13 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) alloc: desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(*tfm), GFP_KERNEL); - if (!desc) { - crypto_free_shash(tmp_tfm); + if (!desc) return ERR_PTR(-ENOMEM); - } desc->tfm = *tfm; rc = crypto_shash_init(desc); if (rc) { - crypto_free_shash(tmp_tfm); kfree(desc); return ERR_PTR(rc); } diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 9e72edb8d31a..755af0b29e75 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -69,10 +69,9 @@ choice hash, defined as 20 bytes, and a null terminated pathname, limited to 255 characters. The 'ima-ng' measurement list template permits both larger hash digests and longer - pathnames. + pathnames. The configured default template can be replaced + by specifying "ima_template=" on the boot command line. - config IMA_TEMPLATE - bool "ima" config IMA_NG_TEMPLATE bool "ima-ng (default)" config IMA_SIG_TEMPLATE @@ -82,7 +81,6 @@ endchoice config IMA_DEFAULT_TEMPLATE string depends on IMA - default "ima" if IMA_TEMPLATE default "ima-ng" if IMA_NG_TEMPLATE default "ima-sig" if IMA_SIG_TEMPLATE @@ -102,19 +100,19 @@ choice config IMA_DEFAULT_HASH_SHA256 bool "SHA256" - depends on CRYPTO_SHA256=y && !IMA_TEMPLATE + depends on CRYPTO_SHA256=y config IMA_DEFAULT_HASH_SHA512 bool "SHA512" - depends on CRYPTO_SHA512=y && !IMA_TEMPLATE + depends on CRYPTO_SHA512=y config IMA_DEFAULT_HASH_WP512 bool "WP512" - depends on CRYPTO_WP512=y && !IMA_TEMPLATE + depends on CRYPTO_WP512=y config IMA_DEFAULT_HASH_SM3 bool "SM3" - depends on CRYPTO_SM3=y && !IMA_TEMPLATE + depends on CRYPTO_SM3=y endchoice config IMA_DEFAULT_HASH diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 3dd8c2e4314e..7122a359a268 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -396,7 +396,8 @@ int ima_appraise_measurement(enum ima_hooks func, goto out; } - status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint); + status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, + rc < 0 ? 0 : rc, iint); switch (status) { case INTEGRITY_PASS: case INTEGRITY_PASS_IMMUTABLE: diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index f6a7e9643b54..b1e5e7749e41 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -205,6 +205,7 @@ int __init ima_init_crypto(void) crypto_free_shash(ima_algo_array[i].tfm); } + kfree(ima_algo_array); out: crypto_free_shash(ima_shash_tfm); return rc; diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index e737c216efc4..18569adcb4fe 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -1805,6 +1805,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id) if (id >= READING_MAX_ID) return false; + if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE) + && security_locked_down(LOCKDOWN_KEXEC)) + return false; + func = read_idmap[id] ?: FILE_CHECK; rcu_read_lock(); diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h index 2462bfa08fe3..cd06bd6072be 100644 --- a/security/integrity/platform_certs/keyring_handler.h +++ b/security/integrity/platform_certs/keyring_handler.h @@ -30,3 +30,11 @@ efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type); efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type); #endif + +#ifndef UEFI_QUIRK_SKIP_CERT +#define UEFI_QUIRK_SKIP_CERT(vendor, product) \ + .matches = { \ + DMI_MATCH(DMI_BOARD_VENDOR, vendor), \ + DMI_MATCH(DMI_PRODUCT_NAME, product), \ + }, +#endif diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index f290f78c3f30..555d2dfc0ff7 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -11,6 +12,31 @@ #include "../integrity.h" #include "keyring_handler.h" +/* + * On T2 Macs reading the db and dbx efi variables to load UEFI Secure Boot + * certificates causes occurrence of a page fault in Apple's firmware and + * a crash disabling EFI runtime services. The following quirk skips reading + * these variables. + */ +static const struct dmi_system_id uefi_skip_cert[] = { + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,2") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,3") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,4") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,2") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,3") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,4") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,2") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir9,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacMini8,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacPro7,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,2") }, + { } +}; + /* * Look to see if a UEFI variable called MokIgnoreDB exists and return true if * it does. @@ -137,6 +163,13 @@ static int __init load_uefi_certs(void) unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0; efi_status_t status; int rc = 0; + const struct dmi_system_id *dmi_id; + + dmi_id = dmi_first_match(uefi_skip_cert); + if (dmi_id) { + pr_err("Reading UEFI Secure Boot Certs is not supported on T2 Macs.\n"); + return false; + } if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) return false; diff --git a/security/security.c b/security/security.c index baa63edb53e2..615c42becb3e 100644 --- a/security/security.c +++ b/security/security.c @@ -2195,15 +2195,16 @@ void security_sk_clone(const struct sock *sk, struct sock *newsk) } EXPORT_SYMBOL(security_sk_clone); -void security_sk_classify_flow(struct sock *sk, struct flowi *fl) +void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic) { - call_void_hook(sk_getsecid, sk, &fl->flowi_secid); + call_void_hook(sk_getsecid, sk, &flic->flowic_secid); } EXPORT_SYMBOL(security_sk_classify_flow); -void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic) { - call_void_hook(req_classify_flow, req, fl); + call_void_hook(req_classify_flow, req, flic); } EXPORT_SYMBOL(security_req_classify_flow); @@ -2395,7 +2396,7 @@ int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) int security_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl) + const struct flowi_common *flic) { struct security_hook_list *hp; int rc = LSM_RET_DEFAULT(xfrm_state_pol_flow_match); @@ -2411,7 +2412,7 @@ int security_xfrm_state_pol_flow_match(struct xfrm_state *x, */ hlist_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match, list) { - rc = hp->hook.xfrm_state_pol_flow_match(x, xp, fl); + rc = hp->hook.xfrm_state_pol_flow_match(x, xp, flic); break; } return rc; @@ -2422,9 +2423,9 @@ int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) return call_int_hook(xfrm_decode_session, 0, skb, secid, 1); } -void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +void security_skb_classify_flow(struct sk_buff *skb, struct flowi_common *flic) { - int rc = call_int_hook(xfrm_decode_session, 0, skb, &fl->flowi_secid, + int rc = call_int_hook(xfrm_decode_session, 0, skb, &flic->flowic_secid, 0); BUG_ON(rc); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3024b84e6871..1235854233c9 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5507,9 +5507,9 @@ static void selinux_secmark_refcount_dec(void) } static void selinux_req_classify_flow(const struct request_sock *req, - struct flowi *fl) + struct flowi_common *flic) { - fl->flowi_secid = req->secid; + flic->flowic_secid = req->secid; } static int selinux_tun_dev_alloc_security(void **security) diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index a0b465316292..0a6f34a7a971 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -26,7 +26,7 @@ int selinux_xfrm_state_delete(struct xfrm_state *x); int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl); + const struct flowi_common *flic); #ifdef CONFIG_SECURITY_NETWORK_XFRM extern atomic_t selinux_xfrm_refcount; diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index 7335f67ce54e..e8960a59586c 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -178,7 +178,8 @@ int hashtab_duplicate(struct hashtab *new, struct hashtab *orig, kmem_cache_free(hashtab_node_cachep, cur); } } - kmem_cache_free(hashtab_node_cachep, new); + kfree(new->htable); + memset(new, 0, sizeof(*new)); return -ENOMEM; } diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 00e95f8bd7c7..114245b6f7c7 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -175,9 +175,10 @@ int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) */ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl) + const struct flowi_common *flic) { u32 state_sid; + u32 flic_sid; if (!xp->security) if (x->security) @@ -196,17 +197,17 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, return 0; state_sid = x->security->ctx_sid; + flic_sid = flic->flowic_secid; - if (fl->flowi_secid != state_sid) + if (flic_sid != state_sid) return 0; /* We don't need a separate SA Vs. policy polmatch check since the SA * is now of the same label as the flow and a flow Vs. policy polmatch * check had already happened in selinux_xfrm_policy_lookup() above. */ - return (avc_has_perm(&selinux_state, - fl->flowi_secid, state_sid, - SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, - NULL) ? 0 : 1); + return (avc_has_perm(&selinux_state, flic_sid, state_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, + NULL) ? 0 : 1); } static u32 selinux_xfrm_skb_sid_egress(struct sk_buff *skb) diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c index a9a0d74f3165..191883842a35 100644 --- a/sound/core/pcm_memory.c +++ b/sound/core/pcm_memory.c @@ -434,7 +434,6 @@ EXPORT_SYMBOL(snd_pcm_lib_malloc_pages); */ int snd_pcm_lib_free_pages(struct snd_pcm_substream *substream) { - struct snd_card *card = substream->pcm->card; struct snd_pcm_runtime *runtime; if (PCM_RUNTIME_CHECK(substream)) @@ -443,6 +442,8 @@ int snd_pcm_lib_free_pages(struct snd_pcm_substream *substream) if (runtime->dma_area == NULL) return 0; if (runtime->dma_buffer_p != &substream->dma_buffer) { + struct snd_card *card = substream->pcm->card; + /* it's a newly allocated buffer. release it now. */ do_free_pages(card, runtime->dma_buffer_p); kfree(runtime->dma_buffer_p); diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c index 3e9e9ac804f6..b7e5032b61c9 100644 --- a/sound/hda/hdac_device.c +++ b/sound/hda/hdac_device.c @@ -660,6 +660,7 @@ static const struct hda_vendor_id hda_vendor_ids[] = { { 0x14f1, "Conexant" }, { 0x17e8, "Chrontel" }, { 0x1854, "LG" }, + { 0x19e5, "Huawei" }, { 0x1aec, "Wolfson Microelectronics" }, { 0x1af4, "QEMU" }, { 0x434d, "C-Media" }, diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index d6420d224d09..09b368761cc0 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -1088,7 +1088,8 @@ wavefront_send_sample (snd_wavefront_t *dev, if (dataptr < data_end) { - __get_user (sample_short, dataptr); + if (get_user(sample_short, dataptr)) + return -EFAULT; dataptr += skip; if (data_is_unsigned) { /* GUS ? */ diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c index f8ac96cf38a4..06775519dab0 100644 --- a/sound/pci/ctxfi/ctatc.c +++ b/sound/pci/ctxfi/ctatc.c @@ -36,6 +36,7 @@ | ((IEC958_AES3_CON_FS_48000) << 24)) static const struct snd_pci_quirk subsys_20k1_list[] = { + SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0021, "SB046x", CTSB046X), SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0022, "SB055x", CTSB055X), SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x002f, "SB055x", CTSB055X), SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, 0x0029, "SB073x", CTSB073X), @@ -64,6 +65,7 @@ static const struct snd_pci_quirk subsys_20k2_list[] = { static const char *ct_subsys_name[NUM_CTCARDS] = { /* 20k1 models */ + [CTSB046X] = "SB046x", [CTSB055X] = "SB055x", [CTSB073X] = "SB073x", [CTUAA] = "UAA", diff --git a/sound/pci/ctxfi/cthardware.h b/sound/pci/ctxfi/cthardware.h index 9e6b83bd432d..b50d61a08e28 100644 --- a/sound/pci/ctxfi/cthardware.h +++ b/sound/pci/ctxfi/cthardware.h @@ -26,8 +26,9 @@ enum CHIPTYP { enum CTCARDS { /* 20k1 models */ + CTSB046X, + CT20K1_MODEL_FIRST = CTSB046X, CTSB055X, - CT20K1_MODEL_FIRST = CTSB055X, CTSB073X, CTUAA, CT20K1_UNKNOWN, diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 4dc01647753c..ec821a263036 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -823,7 +823,7 @@ static void set_pin_targets(struct hda_codec *codec, snd_hda_set_pin_ctl_cache(codec, cfg->nid, cfg->val); } -static void apply_fixup(struct hda_codec *codec, int id, int action, int depth) +void __snd_hda_apply_fixup(struct hda_codec *codec, int id, int action, int depth) { const char *modelname = codec->fixup_name; @@ -833,7 +833,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth) if (++depth > 10) break; if (fix->chained_before) - apply_fixup(codec, fix->chain_id, action, depth + 1); + __snd_hda_apply_fixup(codec, fix->chain_id, action, depth + 1); switch (fix->type) { case HDA_FIXUP_PINS: @@ -874,6 +874,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth) id = fix->chain_id; } } +EXPORT_SYMBOL_GPL(__snd_hda_apply_fixup); /** * snd_hda_apply_fixup - Apply the fixup chain with the given action @@ -883,7 +884,7 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth) void snd_hda_apply_fixup(struct hda_codec *codec, int action) { if (codec->fixup_list) - apply_fixup(codec, codec->fixup_id, action, 0); + __snd_hda_apply_fixup(codec, codec->fixup_id, action, 0); } EXPORT_SYMBOL_GPL(snd_hda_apply_fixup); diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 5beb8aa44ecd..efc0c68a5442 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -357,6 +357,7 @@ void snd_hda_apply_verbs(struct hda_codec *codec); void snd_hda_apply_pincfgs(struct hda_codec *codec, const struct hda_pintbl *cfg); void snd_hda_apply_fixup(struct hda_codec *codec, int action); +void __snd_hda_apply_fixup(struct hda_codec *codec, int id, int action, int depth); void snd_hda_pick_fixup(struct hda_codec *codec, const struct hda_model_fixup *models, const struct snd_pci_quirk *quirk, diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 8098088b0056..6b5d7b4760ed 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -937,6 +937,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x82b4, "HP ProDesk 600 G3", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK), @@ -1045,6 +1046,13 @@ static int patch_conexant_auto(struct hda_codec *codec) snd_hda_pick_fixup(codec, cxt5051_fixup_models, cxt5051_fixups, cxt_fixups); break; + case 0x14f15098: + codec->pin_amp_workaround = 1; + spec->gen.mixer_nid = 0x22; + spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO; + snd_hda_pick_fixup(codec, cxt5066_fixup_models, + cxt5066_fixups, cxt_fixups); + break; case 0x14f150f2: codec->power_save_node = 1; fallthrough; @@ -1065,11 +1073,11 @@ static int patch_conexant_auto(struct hda_codec *codec) if (err < 0) goto error; - err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); + err = cx_auto_parse_beep(codec); if (err < 0) goto error; - err = cx_auto_parse_beep(codec); + err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); if (err < 0) goto error; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a5b1fd62a99c..615526126408 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -439,6 +439,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0245: case 0x10ec0255: case 0x10ec0256: + case 0x19e58326: case 0x10ec0257: case 0x10ec0282: case 0x10ec0283: @@ -576,6 +577,7 @@ static void alc_shutup_pins(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: case 0x10ec0283: case 0x10ec0286: case 0x10ec0288: @@ -1990,6 +1992,7 @@ enum { ALC1220_FIXUP_CLEVO_PB51ED_PINS, ALC887_FIXUP_ASUS_AUDIO, ALC887_FIXUP_ASUS_HMIC, + ALCS1200A_FIXUP_MIC_VREF, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -2535,6 +2538,14 @@ static const struct hda_fixup alc882_fixups[] = { .chained = true, .chain_id = ALC887_FIXUP_ASUS_AUDIO, }, + [ALCS1200A_FIXUP_MIC_VREF] = { + .type = HDA_FIXUP_PINCTLS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, PIN_VREF50 }, /* rear mic */ + { 0x19, PIN_VREF50 }, /* front mic */ + {} + } + }, }; static const struct snd_pci_quirk alc882_fixup_tbl[] = { @@ -2572,6 +2583,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x835f, "Asus Eee 1601", ALC888_FIXUP_EEE1601), SND_PCI_QUIRK(0x1043, 0x84bc, "ASUS ET2700", ALC887_FIXUP_ASUS_BASS), SND_PCI_QUIRK(0x1043, 0x8691, "ASUS ROG Ranger VIII", ALC882_FIXUP_GPIO3), + SND_PCI_QUIRK(0x1043, 0x8797, "ASUS TUF B550M-PLUS", ALCS1200A_FIXUP_MIC_VREF), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9044, "Sony VAIO AiO", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), @@ -2631,6 +2643,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67f1, "Clevo PC70H[PRS]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67f5, "Clevo PD70PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170SM", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x7715, "Clevo X170KM-G", ALC1220_FIXUP_CLEVO_PB51ED), @@ -3242,6 +3255,7 @@ static void alc_disable_headset_jack_key(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_write_coef_idx(codec, 0x48, 0x0); alc_update_coef_idx(codec, 0x49, 0x0045, 0x0); break; @@ -3270,6 +3284,7 @@ static void alc_enable_headset_jack_key(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_write_coef_idx(codec, 0x48, 0xd011); alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045); break; @@ -4839,6 +4854,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_process_coef_fw(codec, coef0256); break; case 0x10ec0234: @@ -4954,6 +4970,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_write_coef_idx(codec, 0x45, 0xc489); snd_hda_set_pin_ctl_cache(codec, hp_pin, 0); alc_process_coef_fw(codec, coef0256); @@ -5104,6 +5121,7 @@ static void alc_headset_mode_default(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_write_coef_idx(codec, 0x1b, 0x0e4b); alc_write_coef_idx(codec, 0x45, 0xc089); msleep(50); @@ -5203,6 +5221,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_process_coef_fw(codec, coef0256); break; case 0x10ec0234: @@ -5317,6 +5336,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_process_coef_fw(codec, coef0256); break; case 0x10ec0234: @@ -5418,6 +5438,7 @@ static void alc_determine_headset_type(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_write_coef_idx(codec, 0x1b, 0x0e4b); alc_write_coef_idx(codec, 0x06, 0x6104); alc_write_coefex_idx(codec, 0x57, 0x3, 0x09a3); @@ -5712,6 +5733,7 @@ static void alc255_set_default_jack_type(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: alc_process_coef_fw(codec, alc256fw); break; } @@ -6315,6 +6337,7 @@ static void alc_combo_jack_hp_jd_restart(struct hda_codec *codec) case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: + case 0x19e58326: alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); break; @@ -6702,6 +6725,7 @@ enum { ALC298_FIXUP_LENOVO_SPK_VOLUME, ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, ALC269_FIXUP_ATIV_BOOK_8, + ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE, ALC221_FIXUP_HP_MIC_NO_PRESENCE, ALC256_FIXUP_ASUS_HEADSET_MODE, ALC256_FIXUP_ASUS_MIC, @@ -6805,6 +6829,7 @@ enum { ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS, ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE, ALC287_FIXUP_YOGA7_14ITL_SPEAKERS, + ALC298_FIXUP_LENOVO_C940_DUET7, ALC287_FIXUP_13S_GEN2_SPEAKERS, ALC256_FIXUP_SET_COEF_DEFAULTS, ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, @@ -6814,6 +6839,23 @@ enum { ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, }; +/* A special fixup for Lenovo C940 and Yoga Duet 7; + * both have the very same PCI SSID, and we need to apply different fixups + * depending on the codec ID + */ +static void alc298_fixup_lenovo_c940_duet7(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + int id; + + if (codec->core.vendor_id == 0x10ec0298) + id = ALC298_FIXUP_LENOVO_SPK_VOLUME; /* C940 */ + else + id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* Duet 7 */ + __snd_hda_apply_fixup(codec, id, action, 0); +} + static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_GPIO2] = { .type = HDA_FIXUP_FUNC, @@ -7610,6 +7652,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_NO_SHUTUP }, + [ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x01813030 }, /* use as headphone mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, [ALC221_FIXUP_HP_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -8507,6 +8559,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE, }, + [ALC298_FIXUP_LENOVO_C940_DUET7] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc298_fixup_lenovo_c940_duet7, + }, [ALC287_FIXUP_13S_GEN2_SPEAKERS] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -8588,6 +8644,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x129d, "Acer SWIFT SF313-51", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), @@ -8597,6 +8654,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), + SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05be, "Dell Latitude E6540", ALC292_FIXUP_DELL_E7X), @@ -8651,6 +8709,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0a9d, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0a9e, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0b19, "Dell XPS 15 9520", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -8710,6 +8769,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2b5e, "HP 288 Pro G2 MT", ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8077, "HP", ALC256_FIXUP_HP_HEADSET_MIC), @@ -8745,6 +8805,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), @@ -8770,6 +8831,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -8885,6 +8947,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x70f3, "Clevo NH77DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x7716, "Clevo NS50PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x7718, "Clevo L140PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -8966,8 +9030,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), + SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), - SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), + SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), @@ -9011,9 +9076,18 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS), + SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1111, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1119, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1129, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), @@ -9793,6 +9867,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x19e58326: spec->codec_variant = ALC269_TYPE_ALC256; spec->shutup = alc256_shutup; spec->init_hook = alc256_init; @@ -10412,6 +10487,7 @@ enum { ALC668_FIXUP_MIC_DET_COEF, ALC897_FIXUP_LENOVO_HEADSET_MIC, ALC897_FIXUP_HEADSET_MIC_PIN, + ALC897_FIXUP_HP_HSMIC_VERB, }; static const struct hda_fixup alc662_fixups[] = { @@ -10831,6 +10907,13 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC897_FIXUP_LENOVO_HEADSET_MIC }, + [ALC897_FIXUP_HP_HSMIC_VERB] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -10856,7 +10939,9 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), + SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB), SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2), + SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), @@ -10876,6 +10961,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), + SND_PCI_QUIRK(0x17aa, 0x1057, "Lenovo P360", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32ca, "Lenovo ThinkCentre M80", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN), @@ -11234,6 +11320,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0b00, "ALCS1200A", patch_alc882), HDA_CODEC_ENTRY(0x10ec1168, "ALC1220", patch_alc882), HDA_CODEC_ENTRY(0x10ec1220, "ALC1220", patch_alc882), + HDA_CODEC_ENTRY(0x19e58326, "HW8326", patch_alc269), {} /* terminator */ }; MODULE_DEVICE_TABLE(hdaudio, snd_hda_id_realtek); diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 773a136161f1..a188901a83bb 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -520,11 +520,11 @@ static int via_parse_auto_config(struct hda_codec *codec) if (err < 0) return err; - err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); + err = auto_parse_beep(codec); if (err < 0) return err; - err = auto_parse_beep(codec); + err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg); if (err < 0) return err; diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c index b1a28a9382fb..f91a0e728ed5 100644 --- a/sound/soc/atmel/atmel-classd.c +++ b/sound/soc/atmel/atmel-classd.c @@ -458,7 +458,6 @@ static const struct snd_soc_component_driver atmel_classd_cpu_dai_component = { .num_controls = ARRAY_SIZE(atmel_classd_snd_controls), .idle_bias_on = 1, .use_pmdown_time = 1, - .endianness = 1, }; /* ASoC sound card */ diff --git a/sound/soc/atmel/atmel-pdmic.c b/sound/soc/atmel/atmel-pdmic.c index 8e1d8230b180..049383e5405e 100644 --- a/sound/soc/atmel/atmel-pdmic.c +++ b/sound/soc/atmel/atmel-pdmic.c @@ -481,7 +481,6 @@ static const struct snd_soc_component_driver atmel_pdmic_cpu_dai_component = { .num_controls = ARRAY_SIZE(atmel_pdmic_snd_controls), .idle_bias_on = 1, .use_pmdown_time = 1, - .endianness = 1, }; /* ASoC sound card */ diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 52c89a6f54e9..25f331551f68 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -857,7 +857,6 @@ config SND_SOC_MAX98095 config SND_SOC_MAX98357A tristate "Maxim MAX98357A CODEC" - depends on GPIOLIB config SND_SOC_MAX98371 tristate @@ -1099,7 +1098,6 @@ config SND_SOC_RT1015 config SND_SOC_RT1015P tristate - depends on GPIOLIB config SND_SOC_RT1305 tristate diff --git a/sound/soc/codecs/cs35l36.c b/sound/soc/codecs/cs35l36.c index e9b5f76f27a8..aa32b8c26578 100644 --- a/sound/soc/codecs/cs35l36.c +++ b/sound/soc/codecs/cs35l36.c @@ -444,7 +444,8 @@ static bool cs35l36_volatile_reg(struct device *dev, unsigned int reg) } } -static DECLARE_TLV_DB_SCALE(dig_vol_tlv, -10200, 25, 0); +static const DECLARE_TLV_DB_RANGE(dig_vol_tlv, 0, 912, + TLV_DB_MINMAX_ITEM(-10200, 1200)); static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1); static const char * const cs35l36_pcm_sftramp_text[] = { diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index c61b17dc2af8..fc6a2bc311b4 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -146,7 +146,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = { 0, 0xA0, 96, adc_att_tlv), SOC_DOUBLE_R_SX_TLV("PGA Volume", CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL, - 0, 0x1A, 30, pga_tlv), + 0, 0x19, 30, pga_tlv), SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0), SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0), SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0), diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c index f772628f233e..38223641bdf6 100644 --- a/sound/soc/codecs/cs42l52.c +++ b/sound/soc/codecs/cs42l52.c @@ -137,7 +137,9 @@ static DECLARE_TLV_DB_SCALE(mic_tlv, 1600, 100, 0); static DECLARE_TLV_DB_SCALE(pga_tlv, -600, 50, 0); -static DECLARE_TLV_DB_SCALE(mix_tlv, -50, 50, 0); +static DECLARE_TLV_DB_SCALE(pass_tlv, -6000, 50, 0); + +static DECLARE_TLV_DB_SCALE(mix_tlv, -5150, 50, 0); static DECLARE_TLV_DB_SCALE(beep_tlv, -56, 200, 0); @@ -351,7 +353,7 @@ static const struct snd_kcontrol_new cs42l52_snd_controls[] = { CS42L52_SPKB_VOL, 0, 0x40, 0xC0, hl_tlv), SOC_DOUBLE_R_SX_TLV("Bypass Volume", CS42L52_PASSTHRUA_VOL, - CS42L52_PASSTHRUB_VOL, 0, 0x88, 0x90, pga_tlv), + CS42L52_PASSTHRUB_VOL, 0, 0x88, 0x90, pass_tlv), SOC_DOUBLE("Bypass Mute", CS42L52_MISC_CTL, 4, 5, 1, 0), @@ -364,7 +366,7 @@ static const struct snd_kcontrol_new cs42l52_snd_controls[] = { CS42L52_ADCB_VOL, 0, 0xA0, 0x78, ipd_tlv), SOC_DOUBLE_R_SX_TLV("ADC Mixer Volume", CS42L52_ADCA_MIXER_VOL, CS42L52_ADCB_MIXER_VOL, - 0, 0x19, 0x7F, ipd_tlv), + 0, 0x19, 0x7F, mix_tlv), SOC_DOUBLE("ADC Switch", CS42L52_ADC_MISC_CTL, 0, 1, 1, 0), diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c index 06dcfae9dfe7..d41e03193106 100644 --- a/sound/soc/codecs/cs42l56.c +++ b/sound/soc/codecs/cs42l56.c @@ -391,9 +391,9 @@ static const struct snd_kcontrol_new cs42l56_snd_controls[] = { SOC_DOUBLE("ADC Boost Switch", CS42L56_GAIN_BIAS_CTL, 3, 2, 1, 1), SOC_DOUBLE_R_SX_TLV("Headphone Volume", CS42L56_HPA_VOLUME, - CS42L56_HPB_VOLUME, 0, 0x84, 0x48, hl_tlv), + CS42L56_HPB_VOLUME, 0, 0x44, 0x48, hl_tlv), SOC_DOUBLE_R_SX_TLV("LineOut Volume", CS42L56_LOA_VOLUME, - CS42L56_LOB_VOLUME, 0, 0x84, 0x48, hl_tlv), + CS42L56_LOB_VOLUME, 0, 0x44, 0x48, hl_tlv), SOC_SINGLE_TLV("Bass Shelving Volume", CS42L56_TONE_CTL, 0, 0x00, 1, tone_tlv), diff --git a/sound/soc/codecs/cs47l15.c b/sound/soc/codecs/cs47l15.c index 254f9d96e766..7c20642f160a 100644 --- a/sound/soc/codecs/cs47l15.c +++ b/sound/soc/codecs/cs47l15.c @@ -122,6 +122,9 @@ static int cs47l15_in1_adc_put(struct snd_kcontrol *kcontrol, snd_soc_kcontrol_component(kcontrol); struct cs47l15 *cs47l15 = snd_soc_component_get_drvdata(component); + if (!!ucontrol->value.integer.value[0] == cs47l15->in1_lp_mode) + return 0; + switch (ucontrol->value.integer.value[0]) { case 0: /* Set IN1 to normal mode */ @@ -150,7 +153,7 @@ static int cs47l15_in1_adc_put(struct snd_kcontrol *kcontrol, break; } - return 0; + return 1; } static const struct snd_kcontrol_new cs47l15_snd_controls[] = { diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c index ed22361b35c1..a5a383b92305 100644 --- a/sound/soc/codecs/cs53l30.c +++ b/sound/soc/codecs/cs53l30.c @@ -347,22 +347,22 @@ static const struct snd_kcontrol_new cs53l30_snd_controls[] = { SOC_ENUM("ADC2 NG Delay", adc2_ng_delay_enum), SOC_SINGLE_SX_TLV("ADC1A PGA Volume", - CS53L30_ADC1A_AFE_CTL, 0, 0x34, 0x18, pga_tlv), + CS53L30_ADC1A_AFE_CTL, 0, 0x34, 0x24, pga_tlv), SOC_SINGLE_SX_TLV("ADC1B PGA Volume", - CS53L30_ADC1B_AFE_CTL, 0, 0x34, 0x18, pga_tlv), + CS53L30_ADC1B_AFE_CTL, 0, 0x34, 0x24, pga_tlv), SOC_SINGLE_SX_TLV("ADC2A PGA Volume", - CS53L30_ADC2A_AFE_CTL, 0, 0x34, 0x18, pga_tlv), + CS53L30_ADC2A_AFE_CTL, 0, 0x34, 0x24, pga_tlv), SOC_SINGLE_SX_TLV("ADC2B PGA Volume", - CS53L30_ADC2B_AFE_CTL, 0, 0x34, 0x18, pga_tlv), + CS53L30_ADC2B_AFE_CTL, 0, 0x34, 0x24, pga_tlv), SOC_SINGLE_SX_TLV("ADC1A Digital Volume", - CS53L30_ADC1A_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv), + CS53L30_ADC1A_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv), SOC_SINGLE_SX_TLV("ADC1B Digital Volume", - CS53L30_ADC1B_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv), + CS53L30_ADC1B_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv), SOC_SINGLE_SX_TLV("ADC2A Digital Volume", - CS53L30_ADC2A_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv), + CS53L30_ADC2A_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv), SOC_SINGLE_SX_TLV("ADC2B Digital Volume", - CS53L30_ADC2B_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv), + CS53L30_ADC2B_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv), }; static const struct snd_soc_dapm_widget cs53l30_dapm_widgets[] = { diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c index 7e26231a596a..081b5f189632 100644 --- a/sound/soc/codecs/es8328.c +++ b/sound/soc/codecs/es8328.c @@ -161,13 +161,16 @@ static int es8328_put_deemph(struct snd_kcontrol *kcontrol, if (deemph > 1) return -EINVAL; + if (es8328->deemph == deemph) + return 0; + ret = es8328_set_deemph(component); if (ret < 0) return ret; es8328->deemph = deemph; - return 0; + return 1; } diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c index 680f31a6493a..bbab4bc1f6b5 100644 --- a/sound/soc/codecs/madera.c +++ b/sound/soc/codecs/madera.c @@ -618,7 +618,13 @@ int madera_out1_demux_put(struct snd_kcontrol *kcontrol, end: snd_soc_dapm_mutex_unlock(dapm); - return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); + ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); + if (ret < 0) { + dev_err(madera->dev, "Failed to update demux power state: %d\n", ret); + return ret; + } + + return change; } EXPORT_SYMBOL_GPL(madera_out1_demux_put); @@ -893,7 +899,7 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; const int adsp_num = e->shift_l; const unsigned int item = ucontrol->value.enumerated.item[0]; - int ret; + int ret = 0; if (item >= e->items) return -EINVAL; @@ -910,10 +916,10 @@ static int madera_adsp_rate_put(struct snd_kcontrol *kcontrol, "Cannot change '%s' while in use by active audio paths\n", kcontrol->id.name); ret = -EBUSY; - } else { + } else if (priv->adsp_rate_cache[adsp_num] != e->values[item]) { /* Volatile register so defer until the codec is powered up */ priv->adsp_rate_cache[adsp_num] = e->values[item]; - ret = 0; + ret = 1; } mutex_unlock(&priv->rate_lock); diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 5b6405392f08..0c73979cad4a 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -393,7 +393,8 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol, struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value; unsigned int mask = (1 << fls(mc->max)) - 1; - unsigned int sel = ucontrol->value.integer.value[0]; + int sel_unchecked = ucontrol->value.integer.value[0]; + unsigned int sel; unsigned int val = snd_soc_component_read(component, mc->reg); unsigned int *select; @@ -413,8 +414,9 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol, val = (val >> mc->shift) & mask; - if (sel < 0 || sel > mc->max) + if (sel_unchecked < 0 || sel_unchecked > mc->max) return -EINVAL; + sel = sel_unchecked; *select = sel; diff --git a/sound/soc/codecs/nau8822.c b/sound/soc/codecs/nau8822.c index 609aeeb27818..d831959d8ff7 100644 --- a/sound/soc/codecs/nau8822.c +++ b/sound/soc/codecs/nau8822.c @@ -740,6 +740,8 @@ static int nau8822_set_pll(struct snd_soc_dai *dai, int pll_id, int source, pll_param->pll_int, pll_param->pll_frac, pll_param->mclk_scaler, pll_param->pre_factor); + snd_soc_component_update_bits(component, + NAU8822_REG_POWER_MANAGEMENT_1, NAU8822_PLL_EN_MASK, NAU8822_PLL_OFF); snd_soc_component_update_bits(component, NAU8822_REG_PLL_N, NAU8822_PLLMCLK_DIV2 | NAU8822_PLLN_MASK, (pll_param->pre_factor ? NAU8822_PLLMCLK_DIV2 : 0) | @@ -757,6 +759,8 @@ static int nau8822_set_pll(struct snd_soc_dai *dai, int pll_id, int source, pll_param->mclk_scaler << NAU8822_MCLKSEL_SFT); snd_soc_component_update_bits(component, NAU8822_REG_CLOCKING, NAU8822_CLKM_MASK, NAU8822_CLKM_PLL); + snd_soc_component_update_bits(component, + NAU8822_REG_POWER_MANAGEMENT_1, NAU8822_PLL_EN_MASK, NAU8822_PLL_ON); return 0; } diff --git a/sound/soc/codecs/nau8822.h b/sound/soc/codecs/nau8822.h index 489191ff187e..b45d42c15de6 100644 --- a/sound/soc/codecs/nau8822.h +++ b/sound/soc/codecs/nau8822.h @@ -90,6 +90,9 @@ #define NAU8822_REFIMP_3K 0x3 #define NAU8822_IOBUF_EN (0x1 << 2) #define NAU8822_ABIAS_EN (0x1 << 3) +#define NAU8822_PLL_EN_MASK (0x1 << 5) +#define NAU8822_PLL_ON (0x1 << 5) +#define NAU8822_PLL_OFF (0x0 << 5) /* NAU8822_REG_AUDIO_INTERFACE (0x4) */ #define NAU8822_AIFMT_MASK (0x3 << 3) diff --git a/sound/soc/codecs/rk3328_codec.c b/sound/soc/codecs/rk3328_codec.c index aed18cbb9f68..e40b97929f6c 100644 --- a/sound/soc/codecs/rk3328_codec.c +++ b/sound/soc/codecs/rk3328_codec.c @@ -481,7 +481,7 @@ static int rk3328_platform_probe(struct platform_device *pdev) ret = clk_prepare_enable(rk3328->pclk); if (ret < 0) { dev_err(&pdev->dev, "failed to enable acodec pclk\n"); - return ret; + goto err_unprepare_mclk; } base = devm_platform_ioremap_resource(pdev, 0); diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 7081142a355e..c444a56df95b 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -419,7 +419,7 @@ static int rt5514_dsp_voice_wake_up_put(struct snd_kcontrol *kcontrol, } } - return 0; + return 1; } static const struct snd_kcontrol_new rt5514_snd_controls[] = { diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 420003d062c7..d1533e95a74f 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -4095,9 +4095,14 @@ static int rt5645_i2c_remove(struct i2c_client *i2c) if (i2c->irq) free_irq(i2c->irq, rt5645); + /* + * Since the rt5645_btn_check_callback() can queue jack_detect_work, + * the timer need to be delted first + */ + del_timer_sync(&rt5645->btn_check_timer); + cancel_delayed_work_sync(&rt5645->jack_detect_work); cancel_delayed_work_sync(&rt5645->rcclock_work); - del_timer_sync(&rt5645->btn_check_timer); regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies); diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 4c0e87e22b97..f066e016a874 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1797,6 +1797,9 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); + clk_disable_unprepare(sgtl5000->mclk); regulator_bulk_disable(sgtl5000->num_supplies, sgtl5000->supplies); regulator_bulk_free(sgtl5000->num_supplies, sgtl5000->supplies); @@ -1804,6 +1807,11 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) return 0; } +static void sgtl5000_i2c_shutdown(struct i2c_client *client) +{ + sgtl5000_i2c_remove(client); +} + static const struct i2c_device_id sgtl5000_id[] = { {"sgtl5000", 0}, {}, @@ -1824,6 +1832,7 @@ static struct i2c_driver sgtl5000_i2c_driver = { }, .probe = sgtl5000_i2c_probe, .remove = sgtl5000_i2c_remove, + .shutdown = sgtl5000_i2c_shutdown, .id_table = sgtl5000_id, }; diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 56ec5863f250..3a808c762299 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -80,6 +80,7 @@ /* * SGTL5000_CHIP_DIG_POWER */ +#define SGTL5000_DIG_POWER_DEFAULT 0x0000 #define SGTL5000_ADC_EN 0x0040 #define SGTL5000_DAC_EN 0x0020 #define SGTL5000_DAP_POWERUP 0x0010 diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 14a193e48dc7..37588804a6b5 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -42,10 +42,12 @@ static void tas2764_reset(struct tas2764_priv *tas2764) gpiod_set_value_cansleep(tas2764->reset_gpio, 0); msleep(20); gpiod_set_value_cansleep(tas2764->reset_gpio, 1); + usleep_range(1000, 2000); } snd_soc_component_write(tas2764->component, TAS2764_SW_RST, TAS2764_RST); + usleep_range(1000, 2000); } static int tas2764_set_bias_level(struct snd_soc_component *component, @@ -107,8 +109,10 @@ static int tas2764_codec_resume(struct snd_soc_component *component) struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); int ret; - if (tas2764->sdz_gpio) + if (tas2764->sdz_gpio) { gpiod_set_value_cansleep(tas2764->sdz_gpio, 1); + usleep_range(1000, 2000); + } ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, TAS2764_PWR_CTRL_MASK, @@ -131,7 +135,8 @@ static const char * const tas2764_ASI1_src[] = { }; static SOC_ENUM_SINGLE_DECL( - tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, 4, tas2764_ASI1_src); + tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, TAS2764_TDM_CFG2_SCFG_SHIFT, + tas2764_ASI1_src); static const struct snd_kcontrol_new tas2764_asi1_mux = SOC_DAPM_ENUM("ASI1 Source", tas2764_ASI1_src_enum); @@ -329,20 +334,22 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct snd_soc_component *component = dai->component; struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); - u8 tdm_rx_start_slot = 0, asi_cfg_1 = 0; - int iface; + u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0; int ret; switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_IF: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_NB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING; break; + case SND_SOC_DAIFMT_IB_IF: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_IB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING; break; - default: - dev_err(tas2764->dev, "ASI format Inverse is not found\n"); - return -EINVAL; } ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, @@ -353,13 +360,13 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_DSP_A: - iface = TAS2764_TDM_CFG2_SCFG_I2S; tdm_rx_start_slot = 1; break; case SND_SOC_DAIFMT_DSP_B: case SND_SOC_DAIFMT_LEFT_J: - iface = TAS2764_TDM_CFG2_SCFG_LEFT_J; tdm_rx_start_slot = 0; break; default: @@ -368,14 +375,15 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, - TAS2764_TDM_CFG1_MASK, - (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT)); + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG0, + TAS2764_TDM_CFG0_FRAME_START, + asi_cfg_0); if (ret < 0) return ret; - ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG2, - TAS2764_TDM_CFG2_SCFG_MASK, iface); + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, + TAS2764_TDM_CFG1_MASK, + (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT)); if (ret < 0) return ret; @@ -501,8 +509,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component) tas2764->component = component; - if (tas2764->sdz_gpio) + if (tas2764->sdz_gpio) { gpiod_set_value_cansleep(tas2764->sdz_gpio, 1); + usleep_range(1000, 2000); + } tas2764_reset(tas2764); @@ -526,12 +536,12 @@ static int tas2764_codec_probe(struct snd_soc_component *component) } static DECLARE_TLV_DB_SCALE(tas2764_digital_tlv, 1100, 50, 0); -static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10000, 50, 0); +static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10050, 50, 1); static const struct snd_kcontrol_new tas2764_snd_controls[] = { SOC_SINGLE_TLV("Speaker Volume", TAS2764_DVC, 0, TAS2764_DVC_MAX, 1, tas2764_playback_volume), - SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 0, 0x14, 0, + SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 1, 0x14, 0, tas2764_digital_tlv), }; @@ -556,7 +566,7 @@ static const struct reg_default tas2764_reg_defaults[] = { { TAS2764_SW_RST, 0x00 }, { TAS2764_PWR_CTRL, 0x1a }, { TAS2764_DVC, 0x00 }, - { TAS2764_CHNL_0, 0x00 }, + { TAS2764_CHNL_0, 0x28 }, { TAS2764_TDM_CFG0, 0x09 }, { TAS2764_TDM_CFG1, 0x02 }, { TAS2764_TDM_CFG2, 0x0a }, diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h index 67d6fd903c42..f015f22a083b 100644 --- a/sound/soc/codecs/tas2764.h +++ b/sound/soc/codecs/tas2764.h @@ -47,6 +47,7 @@ #define TAS2764_TDM_CFG0_MASK GENMASK(3, 1) #define TAS2764_TDM_CFG0_44_1_48KHZ BIT(3) #define TAS2764_TDM_CFG0_88_2_96KHZ (BIT(3) | BIT(1)) +#define TAS2764_TDM_CFG0_FRAME_START BIT(0) /* TDM Configuration Reg1 */ #define TAS2764_TDM_CFG1 TAS2764_REG(0X0, 0x09) @@ -66,10 +67,7 @@ #define TAS2764_TDM_CFG2_RXS_16BITS 0x0 #define TAS2764_TDM_CFG2_RXS_24BITS BIT(0) #define TAS2764_TDM_CFG2_RXS_32BITS BIT(1) -#define TAS2764_TDM_CFG2_SCFG_MASK GENMASK(5, 4) -#define TAS2764_TDM_CFG2_SCFG_I2S 0x0 -#define TAS2764_TDM_CFG2_SCFG_LEFT_J BIT(4) -#define TAS2764_TDM_CFG2_SCFG_RIGHT_J BIT(5) +#define TAS2764_TDM_CFG2_SCFG_SHIFT 4 /* TDM Configuration Reg3 */ #define TAS2764_TDM_CFG3 TAS2764_REG(0X0, 0x0c) diff --git a/sound/soc/codecs/tscs454.c b/sound/soc/codecs/tscs454.c index d0af16b4db2f..a6f339bb4771 100644 --- a/sound/soc/codecs/tscs454.c +++ b/sound/soc/codecs/tscs454.c @@ -3115,18 +3115,17 @@ static int set_aif_sample_format(struct snd_soc_component *component, unsigned int width; int ret; - switch (format) { - case SNDRV_PCM_FORMAT_S16_LE: + switch (snd_pcm_format_width(format)) { + case 16: width = FV_WL_16; break; - case SNDRV_PCM_FORMAT_S20_3LE: + case 20: width = FV_WL_20; break; - case SNDRV_PCM_FORMAT_S24_3LE: + case 24: width = FV_WL_24; break; - case SNDRV_PCM_FORMAT_S24_LE: - case SNDRV_PCM_FORMAT_S32_LE: + case 32: width = FV_WL_32; break; default: @@ -3321,6 +3320,7 @@ static const struct snd_soc_component_driver soc_component_dev_tscs454 = { .num_dapm_routes = ARRAY_SIZE(tscs454_intercon), .controls = tscs454_snd_controls, .num_controls = ARRAY_SIZE(tscs454_snd_controls), + .endianness = 1, }; #define TSCS454_RATES SNDRV_PCM_RATE_8000_96000 diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index 72e165cc6443..97ece3114b3d 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -536,7 +536,7 @@ static int wm2000_anc_transition(struct wm2000_priv *wm2000, { struct i2c_client *i2c = wm2000->i2c; int i, j; - int ret; + int ret = 0; if (wm2000->anc_mode == mode) return 0; @@ -566,13 +566,13 @@ static int wm2000_anc_transition(struct wm2000_priv *wm2000, ret = anc_transitions[i].step[j](i2c, anc_transitions[i].analogue); if (ret != 0) - return ret; + break; } if (anc_transitions[i].dest == ANC_OFF) clk_disable_unprepare(wm2000->mclk); - return 0; + return ret; } static int wm2000_anc_set_mode(struct wm2000_priv *wm2000) diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 4238929b2375..d0cef982215d 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -413,6 +413,7 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, unsigned int rnew = (!!ucontrol->value.integer.value[1]) << mc->rshift; unsigned int lold, rold; unsigned int lena, rena; + bool change = false; int ret; snd_soc_dapm_mutex_lock(dapm); @@ -440,8 +441,8 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, goto err; } - ret = regmap_update_bits(arizona->regmap, ARIZONA_DRE_ENABLE, - mask, lnew | rnew); + ret = regmap_update_bits_check(arizona->regmap, ARIZONA_DRE_ENABLE, + mask, lnew | rnew, &change); if (ret) { dev_err(arizona->dev, "Failed to set DRE: %d\n", ret); goto err; @@ -454,6 +455,9 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, if (!rnew && rold) wm5110_clear_pga_volume(arizona, mc->rshift); + if (change) + ret = 1; + err: snd_soc_dapm_mutex_unlock(dapm); diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 0bd3bbc2aacf..38651022e3d5 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -3864,6 +3864,7 @@ static int wm8962_runtime_suspend(struct device *dev) #endif static const struct dev_pm_ops wm8962_pm = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) SET_RUNTIME_PM_OPS(wm8962_runtime_suspend, wm8962_runtime_resume, NULL) }; diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 51d95437e0fd..10189f44af28 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -800,7 +800,7 @@ int wm_adsp_fw_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; struct wm_adsp *dsp = snd_soc_component_get_drvdata(component); - int ret = 0; + int ret = 1; if (ucontrol->value.enumerated.item[0] == dsp[e->shift_l].fw) return 0; diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h index 4bbcd0dbe8f1..8923c680f0e0 100644 --- a/sound/soc/fsl/fsl_sai.h +++ b/sound/soc/fsl/fsl_sai.h @@ -80,8 +80,8 @@ #define FSL_SAI_xCR3(tx, ofs) (tx ? FSL_SAI_TCR3(ofs) : FSL_SAI_RCR3(ofs)) #define FSL_SAI_xCR4(tx, ofs) (tx ? FSL_SAI_TCR4(ofs) : FSL_SAI_RCR4(ofs)) #define FSL_SAI_xCR5(tx, ofs) (tx ? FSL_SAI_TCR5(ofs) : FSL_SAI_RCR5(ofs)) -#define FSL_SAI_xDR(tx, ofs) (tx ? FSL_SAI_TDR(ofs) : FSL_SAI_RDR(ofs)) -#define FSL_SAI_xFR(tx, ofs) (tx ? FSL_SAI_TFR(ofs) : FSL_SAI_RFR(ofs)) +#define FSL_SAI_xDR0(tx) (tx ? FSL_SAI_TDR0 : FSL_SAI_RDR0) +#define FSL_SAI_xFR0(tx) (tx ? FSL_SAI_TFR0 : FSL_SAI_RFR0) #define FSL_SAI_xMR(tx) (tx ? FSL_SAI_TMR : FSL_SAI_RMR) /* SAI Transmit/Receive Control Register */ diff --git a/sound/soc/fsl/imx-sgtl5000.c b/sound/soc/fsl/imx-sgtl5000.c index f45cb4bbb6c4..5997bb5acb73 100644 --- a/sound/soc/fsl/imx-sgtl5000.c +++ b/sound/soc/fsl/imx-sgtl5000.c @@ -120,19 +120,19 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); if (!data) { ret = -ENOMEM; - goto fail; + goto put_device; } comp = devm_kzalloc(&pdev->dev, 3 * sizeof(*comp), GFP_KERNEL); if (!comp) { ret = -ENOMEM; - goto fail; + goto put_device; } data->codec_clk = clk_get(&codec_dev->dev, NULL); if (IS_ERR(data->codec_clk)) { ret = PTR_ERR(data->codec_clk); - goto fail; + goto put_device; } data->clk_frequency = clk_get_rate(data->codec_clk); @@ -158,10 +158,10 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) data->card.dev = &pdev->dev; ret = snd_soc_of_parse_card_name(&data->card, "model"); if (ret) - goto fail; + goto put_device; ret = snd_soc_of_parse_audio_routing(&data->card, "audio-routing"); if (ret) - goto fail; + goto put_device; data->card.num_links = 1; data->card.owner = THIS_MODULE; data->card.dai_link = &data->dai; @@ -176,7 +176,7 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) if (ret != -EPROBE_DEFER) dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", ret); - goto fail; + goto put_device; } of_node_put(ssi_np); @@ -184,6 +184,8 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) return 0; +put_device: + put_device(&codec_dev->dev); fail: if (data && !IS_ERR(data->codec_clk)) clk_put(data->codec_clk); diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 43ee3d095a1b..3020a993f6ef 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -615,6 +615,18 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_OVCD_SF_0P75 | BYT_RT5640_MCLK_EN), }, + { /* HP Pro Tablet 408 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pro Tablet 408"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_1500UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* HP Stream 7 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 87c891c46291..3b3868df9f67 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -201,7 +201,6 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, struct nhlt_fmt_cfg *fmt_cfg; struct wav_fmt_ext *wav_fmt; unsigned long rate; - bool present = false; int rate_index = 0; u16 channels, bps; u8 clk_src; @@ -214,9 +213,12 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, if (fmt->fmt_count == 0) return; + fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; for (i = 0; i < fmt->fmt_count; i++) { - fmt_cfg = &fmt->fmt_config[i]; - wav_fmt = &fmt_cfg->fmt_ext; + struct nhlt_fmt_cfg *saved_fmt_cfg = fmt_cfg; + bool present = false; + + wav_fmt = &saved_fmt_cfg->fmt_ext; channels = wav_fmt->fmt.channels; bps = wav_fmt->fmt.bits_per_sample; @@ -234,12 +236,18 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, * derive the rate. */ for (j = i; j < fmt->fmt_count; j++) { - fmt_cfg = &fmt->fmt_config[j]; - wav_fmt = &fmt_cfg->fmt_ext; + struct nhlt_fmt_cfg *tmp_fmt_cfg = fmt_cfg; + + wav_fmt = &tmp_fmt_cfg->fmt_ext; if ((fs == wav_fmt->fmt.samples_per_sec) && - (bps == wav_fmt->fmt.bits_per_sample)) + (bps == wav_fmt->fmt.bits_per_sample)) { channels = max_t(u16, channels, wav_fmt->fmt.channels); + saved_fmt_cfg = tmp_fmt_cfg; + } + /* Move to the next nhlt_fmt_cfg */ + tmp_fmt_cfg = (struct nhlt_fmt_cfg *)(tmp_fmt_cfg->config.caps + + tmp_fmt_cfg->config.size); } rate = channels * bps * fs; @@ -255,8 +263,11 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, /* Fill rate and parent for sclk/sclkfs */ if (!present) { + struct nhlt_fmt_cfg *first_fmt_cfg; + + first_fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; i2s_config_ext = (struct skl_i2s_config_blob_ext *) - fmt->fmt_config[0].config.caps; + first_fmt_cfg->config.caps; /* MCLK Divider Source Select */ if (is_legacy_blob(i2s_config_ext->hdr.sig)) { @@ -270,6 +281,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, parent = skl_get_parent_clk(clk_src); + /* Move to the next nhlt_fmt_cfg */ + fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps + + fmt_cfg->config.size); /* * Do not copy the config data if there is no parent * clock available for this clock source select @@ -278,9 +292,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, continue; sclk[id].rate_cfg[rate_index].rate = rate; - sclk[id].rate_cfg[rate_index].config = fmt_cfg; + sclk[id].rate_cfg[rate_index].config = saved_fmt_cfg; sclkfs[id].rate_cfg[rate_index].rate = rate; - sclkfs[id].rate_cfg[rate_index].config = fmt_cfg; + sclkfs[id].rate_cfg[rate_index].config = saved_fmt_cfg; sclk[id].parent_name = parent->name; sclkfs[id].parent_name = parent->name; @@ -294,13 +308,13 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk, { struct skl_i2s_config_blob_ext *i2s_config_ext; struct skl_i2s_config_blob_legacy *i2s_config; - struct nhlt_specific_cfg *fmt_cfg; + struct nhlt_fmt_cfg *fmt_cfg; struct skl_clk_parent_src *parent; u32 clkdiv, div_ratio; u8 clk_src; - fmt_cfg = &fmt->fmt_config[0].config; - i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->caps; + fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; + i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->config.caps; /* MCLK Divider Source Select and divider */ if (is_legacy_blob(i2s_config_ext->hdr.sig)) { @@ -329,7 +343,7 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk, return; mclk[id].rate_cfg[0].rate = parent->rate/div_ratio; - mclk[id].rate_cfg[0].config = &fmt->fmt_config[0]; + mclk[id].rate_cfg[0].config = fmt_cfg; mclk[id].parent_name = parent->name; } diff --git a/sound/soc/mediatek/mt2701/mt2701-wm8960.c b/sound/soc/mediatek/mt2701/mt2701-wm8960.c index 414e422c0eba..70e494fb3da8 100644 --- a/sound/soc/mediatek/mt2701/mt2701-wm8960.c +++ b/sound/soc/mediatek/mt2701/mt2701-wm8960.c @@ -129,7 +129,8 @@ static int mt2701_wm8960_machine_probe(struct platform_device *pdev) if (!codec_node) { dev_err(&pdev->dev, "Property 'audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_platform_node; } for_each_card_prelinks(card, i, dai_link) { if (dai_link->codecs->name) @@ -140,7 +141,7 @@ static int mt2701_wm8960_machine_probe(struct platform_device *pdev) ret = snd_soc_of_parse_audio_routing(card, "audio-routing"); if (ret) { dev_err(&pdev->dev, "failed to parse audio-routing: %d\n", ret); - return ret; + goto put_codec_node; } ret = devm_snd_soc_register_card(&pdev->dev, card); @@ -148,6 +149,10 @@ static int mt2701_wm8960_machine_probe(struct platform_device *pdev) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); +put_codec_node: + of_node_put(codec_node); +put_platform_node: + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-max98090.c b/sound/soc/mediatek/mt8173/mt8173-max98090.c index 3bdd4931316c..5f39e810e27a 100644 --- a/sound/soc/mediatek/mt8173/mt8173-max98090.c +++ b/sound/soc/mediatek/mt8173/mt8173-max98090.c @@ -167,7 +167,8 @@ static int mt8173_max98090_dev_probe(struct platform_device *pdev) if (!codec_node) { dev_err(&pdev->dev, "Property 'audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_platform_node; } for_each_card_prelinks(card, i, dai_link) { if (dai_link->codecs->name) @@ -182,6 +183,8 @@ static int mt8173_max98090_dev_probe(struct platform_device *pdev) __func__, ret); of_node_put(codec_node); + +put_platform_node: of_node_put(platform_node); return ret; } diff --git a/sound/soc/mxs/mxs-saif.c b/sound/soc/mxs/mxs-saif.c index f2eda81985e2..d87ac26999cf 100644 --- a/sound/soc/mxs/mxs-saif.c +++ b/sound/soc/mxs/mxs-saif.c @@ -764,6 +764,7 @@ static int mxs_saif_probe(struct platform_device *pdev) saif->master_id = saif->id; } else { ret = of_alias_get_id(master, "saif"); + of_node_put(master); if (ret < 0) return ret; else diff --git a/sound/soc/samsung/aries_wm8994.c b/sound/soc/samsung/aries_wm8994.c index 0ac5956ba270..18458192aff1 100644 --- a/sound/soc/samsung/aries_wm8994.c +++ b/sound/soc/samsung/aries_wm8994.c @@ -585,19 +585,16 @@ static int aries_audio_probe(struct platform_device *pdev) extcon_np = of_parse_phandle(np, "extcon", 0); priv->usb_extcon = extcon_find_edev_by_node(extcon_np); - if (IS_ERR(priv->usb_extcon)) { - if (PTR_ERR(priv->usb_extcon) != -EPROBE_DEFER) - dev_err(dev, "Failed to get extcon device"); - return PTR_ERR(priv->usb_extcon); - } of_node_put(extcon_np); + if (IS_ERR(priv->usb_extcon)) + return dev_err_probe(dev, PTR_ERR(priv->usb_extcon), + "Failed to get extcon device"); priv->adc = devm_iio_channel_get(dev, "headset-detect"); - if (IS_ERR(priv->adc)) { - if (PTR_ERR(priv->adc) != -EPROBE_DEFER) - dev_err(dev, "Failed to get ADC channel"); - return PTR_ERR(priv->adc); - } + if (IS_ERR(priv->adc)) + return dev_err_probe(dev, PTR_ERR(priv->adc), + "Failed to get ADC channel"); + if (priv->adc->channel->type != IIO_VOLTAGE) return -EINVAL; diff --git a/sound/soc/samsung/arndale.c b/sound/soc/samsung/arndale.c index 28587375813a..35e34e534b8b 100644 --- a/sound/soc/samsung/arndale.c +++ b/sound/soc/samsung/arndale.c @@ -174,9 +174,8 @@ static int arndale_audio_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(card->dev, card); if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "snd_soc_register_card() failed: %d\n", ret); + dev_err_probe(&pdev->dev, ret, + "snd_soc_register_card() failed\n"); goto err_put_of_nodes; } return 0; diff --git a/sound/soc/samsung/littlemill.c b/sound/soc/samsung/littlemill.c index a1ff1400857e..e73356a66087 100644 --- a/sound/soc/samsung/littlemill.c +++ b/sound/soc/samsung/littlemill.c @@ -325,9 +325,8 @@ static int littlemill_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret && ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", - ret); + if (ret) + dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); return ret; } diff --git a/sound/soc/samsung/lowland.c b/sound/soc/samsung/lowland.c index 998d10cf8c94..7b12ccd2a9b2 100644 --- a/sound/soc/samsung/lowland.c +++ b/sound/soc/samsung/lowland.c @@ -183,9 +183,8 @@ static int lowland_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret && ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", - ret); + if (ret) + dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); return ret; } diff --git a/sound/soc/samsung/odroid.c b/sound/soc/samsung/odroid.c index ca643a488c3c..4ff12e2e704f 100644 --- a/sound/soc/samsung/odroid.c +++ b/sound/soc/samsung/odroid.c @@ -311,9 +311,7 @@ static int odroid_audio_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(dev, card); if (ret < 0) { - if (ret != -EPROBE_DEFER) - dev_err(dev, "snd_soc_register_card() failed: %d\n", - ret); + dev_err_probe(dev, ret, "snd_soc_register_card() failed\n"); goto err_put_clk_i2s; } diff --git a/sound/soc/samsung/smdk_wm8994.c b/sound/soc/samsung/smdk_wm8994.c index 64a1a64656ab..92cd9e8a2e61 100644 --- a/sound/soc/samsung/smdk_wm8994.c +++ b/sound/soc/samsung/smdk_wm8994.c @@ -178,8 +178,8 @@ static int smdk_audio_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret && ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "snd_soc_register_card() failed:%d\n", ret); + if (ret) + dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); return ret; } diff --git a/sound/soc/samsung/smdk_wm8994pcm.c b/sound/soc/samsung/smdk_wm8994pcm.c index a01640576f71..110a51a4f870 100644 --- a/sound/soc/samsung/smdk_wm8994pcm.c +++ b/sound/soc/samsung/smdk_wm8994pcm.c @@ -118,8 +118,8 @@ static int snd_smdk_probe(struct platform_device *pdev) smdk_pcm.dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, &smdk_pcm); - if (ret && ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "snd_soc_register_card failed %d\n", ret); + if (ret) + dev_err_probe(&pdev->dev, ret, "snd_soc_register_card failed\n"); return ret; } diff --git a/sound/soc/samsung/snow.c b/sound/soc/samsung/snow.c index 07163f07c6d5..6aa2c66d8e8c 100644 --- a/sound/soc/samsung/snow.c +++ b/sound/soc/samsung/snow.c @@ -215,12 +215,9 @@ static int snow_probe(struct platform_device *pdev) snd_soc_card_set_drvdata(card, priv); ret = devm_snd_soc_register_card(dev, card); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "snd_soc_register_card failed (%d)\n", ret); - return ret; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, + "snd_soc_register_card failed\n"); return ret; } diff --git a/sound/soc/samsung/speyside.c b/sound/soc/samsung/speyside.c index f5f6ba00d073..37b1f4f60b21 100644 --- a/sound/soc/samsung/speyside.c +++ b/sound/soc/samsung/speyside.c @@ -330,9 +330,8 @@ static int speyside_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret && ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", - ret); + if (ret) + dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); return ret; } diff --git a/sound/soc/samsung/tm2_wm5110.c b/sound/soc/samsung/tm2_wm5110.c index 125e07f65d2b..ca1be7a7cb8a 100644 --- a/sound/soc/samsung/tm2_wm5110.c +++ b/sound/soc/samsung/tm2_wm5110.c @@ -611,8 +611,7 @@ static int tm2_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(dev, card); if (ret < 0) { - if (ret != -EPROBE_DEFER) - dev_err(dev, "Failed to register card: %d\n", ret); + dev_err_probe(dev, ret, "Failed to register card\n"); goto dai_node_put; } diff --git a/sound/soc/samsung/tobermory.c b/sound/soc/samsung/tobermory.c index c962d2c2a7f7..95c6267b0c0c 100644 --- a/sound/soc/samsung/tobermory.c +++ b/sound/soc/samsung/tobermory.c @@ -229,9 +229,8 @@ static int tobermory_probe(struct platform_device *pdev) card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); - if (ret && ret != -EPROBE_DEFER) - dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", - ret); + if (ret) + dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); return ret; } diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 417732bdf286..754c1f16ee83 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -62,6 +62,8 @@ struct snd_soc_dapm_widget * snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, const struct snd_soc_dapm_widget *widget); +static unsigned int soc_dapm_read(struct snd_soc_dapm_context *dapm, int reg); + /* dapm power sequences - make this per codec in the future */ static int dapm_up_seq[] = { [snd_soc_dapm_pre] = 1, @@ -442,6 +444,9 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, snd_soc_dapm_add_path(widget->dapm, data->widget, widget, NULL, NULL); + } else if (e->reg != SND_SOC_NOPM) { + data->value = soc_dapm_read(widget->dapm, e->reg) & + (e->mask << e->shift_l); } break; default: @@ -3427,7 +3432,6 @@ int snd_soc_dapm_put_volsw(struct snd_kcontrol *kcontrol, update.val = val; card->update = &update; } - change |= reg_change; ret = soc_dapm_mixer_update_power(card, kcontrol, connect, rconnect); @@ -3529,7 +3533,6 @@ int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, update.val = val; card->update = &update; } - change |= reg_change; ret = soc_dapm_mux_update_power(card, kcontrol, item[0], e); diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 15bfcdbdfaa4..0f26d6c31ce5 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -517,7 +517,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, return -EINVAL; if (mc->platform_max && tmp > mc->platform_max) return -EINVAL; - if (tmp > mc->max - mc->min + 1) + if (tmp > mc->max - mc->min) return -EINVAL; if (invert) @@ -538,7 +538,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, return -EINVAL; if (mc->platform_max && tmp > mc->platform_max) return -EINVAL; - if (tmp > mc->max - mc->min + 1) + if (tmp > mc->max - mc->min) return -EINVAL; if (invert) diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 347636a80b48..4012097a9d60 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -79,9 +79,9 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig } /* - * first boot sequence has some extra steps. core 0 waits for power - * status on core 1, so power up core 1 also momentarily, keep it in - * reset/stall and then turn it off + * first boot sequence has some extra steps. + * power on all host managed cores and only unstall/run the boot core to boot the + * DSP then turn off all non boot cores (if any) is powered on. */ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) { @@ -115,7 +115,7 @@ static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag) ((stream_tag - 1) << 9))); /* step 3: unset core 0 reset state & unstall/run core 0 */ - ret = hda_dsp_core_run(sdev, BIT(0)); + ret = hda_dsp_core_run(sdev, chip->init_core_mask); if (ret < 0) { if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS) dev_err(sdev->dev, diff --git a/sound/soc/ti/j721e-evm.c b/sound/soc/ti/j721e-evm.c index 265bbc5a2f96..756cd9694cbe 100644 --- a/sound/soc/ti/j721e-evm.c +++ b/sound/soc/ti/j721e-evm.c @@ -631,17 +631,18 @@ static int j721e_soc_probe_cpb(struct j721e_priv *priv, int *link_idx, codec_node = of_parse_phandle(node, "ti,cpb-codec", 0); if (!codec_node) { dev_err(priv->dev, "CPB codec node is not provided\n"); - return -EINVAL; + ret = -EINVAL; + goto put_dai_node; } domain = &priv->audio_domains[J721E_AUDIO_DOMAIN_CPB]; ret = j721e_get_clocks(priv->dev, &domain->codec, "cpb-codec-scki"); if (ret) - return ret; + goto put_codec_node; ret = j721e_get_clocks(priv->dev, &domain->mcasp, "cpb-mcasp-auxclk"); if (ret) - return ret; + goto put_codec_node; /* * Common Processor Board, two links @@ -651,8 +652,10 @@ static int j721e_soc_probe_cpb(struct j721e_priv *priv, int *link_idx, comp_count = 6; compnent = devm_kzalloc(priv->dev, comp_count * sizeof(*compnent), GFP_KERNEL); - if (!compnent) - return -ENOMEM; + if (!compnent) { + ret = -ENOMEM; + goto put_codec_node; + } comp_idx = 0; priv->dai_links[*link_idx].cpus = &compnent[comp_idx++]; @@ -703,6 +706,12 @@ static int j721e_soc_probe_cpb(struct j721e_priv *priv, int *link_idx, (*conf_idx)++; return 0; + +put_codec_node: + of_node_put(codec_node); +put_dai_node: + of_node_put(dai_node); + return ret; } static int j721e_soc_probe_ivi(struct j721e_priv *priv, int *link_idx, @@ -727,23 +736,25 @@ static int j721e_soc_probe_ivi(struct j721e_priv *priv, int *link_idx, codeca_node = of_parse_phandle(node, "ti,ivi-codec-a", 0); if (!codeca_node) { dev_err(priv->dev, "IVI codec-a node is not provided\n"); - return -EINVAL; + ret = -EINVAL; + goto put_dai_node; } codecb_node = of_parse_phandle(node, "ti,ivi-codec-b", 0); if (!codecb_node) { dev_warn(priv->dev, "IVI codec-b node is not provided\n"); - return 0; + ret = 0; + goto put_codeca_node; } domain = &priv->audio_domains[J721E_AUDIO_DOMAIN_IVI]; ret = j721e_get_clocks(priv->dev, &domain->codec, "ivi-codec-scki"); if (ret) - return ret; + goto put_codecb_node; ret = j721e_get_clocks(priv->dev, &domain->mcasp, "ivi-mcasp-auxclk"); if (ret) - return ret; + goto put_codecb_node; /* * IVI extension, two links @@ -755,8 +766,10 @@ static int j721e_soc_probe_ivi(struct j721e_priv *priv, int *link_idx, comp_count = 8; compnent = devm_kzalloc(priv->dev, comp_count * sizeof(*compnent), GFP_KERNEL); - if (!compnent) - return -ENOMEM; + if (!compnent) { + ret = -ENOMEM; + goto put_codecb_node; + } comp_idx = 0; priv->dai_links[*link_idx].cpus = &compnent[comp_idx++]; @@ -817,6 +830,15 @@ static int j721e_soc_probe_ivi(struct j721e_priv *priv, int *link_idx, (*conf_idx)++; return 0; + + +put_codecb_node: + of_node_put(codecb_node); +put_codeca_node: + of_node_put(codeca_node); +put_dai_node: + of_node_put(dai_node); + return ret; } static int j721e_soc_probe(struct platform_device *pdev) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 84676a8fb60d..93fee6e365a6 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1161,6 +1161,9 @@ static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream) static int snd_usbmidi_output_close(struct snd_rawmidi_substream *substream) { + struct usbmidi_out_port *port = substream->runtime->private_data; + + cancel_work_sync(&port->ep->work); return substream_open(substream, 0, 0); } diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index aabd3a10ec5b..1ac91c46da3c 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3208,6 +3208,15 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } }, +/* Rane SL-1 */ +{ + USB_DEVICE(0x13e5, 0x0001), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + } +}, + /* disabled due to regression for other devices; * see https://bugzilla.kernel.org/show_bug.cgi?id=199905 */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index b58730cc12e8..ec53f52a06a5 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCEs for Spectre variant 2 */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -290,6 +290,13 @@ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -308,6 +315,7 @@ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -417,5 +425,7 @@ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 5861d34f9771..d109c5ec967c 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -56,6 +56,25 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) +#endif + #ifdef CONFIG_IOMMU_SUPPORT # define DISABLE_ENQCMD 0 #else @@ -76,7 +95,7 @@ #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_SMAP) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h index 877827b7c2c3..a61051400311 100644 --- a/tools/arch/x86/include/asm/inat.h +++ b/tools/arch/x86/include/asm/inat.h @@ -6,7 +6,7 @@ * * Written by Masami Hiramatsu */ -#include "inat_types.h" +#include "inat_types.h" /* __ignore_sync_check__ */ /* * Internal bits. Don't use bitmasks directly, because these bits are diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index 52c6262e6bfd..636ec02793a7 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -8,7 +8,7 @@ */ /* insn_attr_t is defined in inat.h */ -#include "inat.h" +#include "inat.h" /* __ignore_sync_check__ */ struct insn_field { union { @@ -87,13 +87,25 @@ struct insn { #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); -extern void insn_get_prefixes(struct insn *insn); -extern void insn_get_opcode(struct insn *insn); -extern void insn_get_modrm(struct insn *insn); -extern void insn_get_sib(struct insn *insn); -extern void insn_get_displacement(struct insn *insn); -extern void insn_get_immediate(struct insn *insn); -extern void insn_get_length(struct insn *insn); +extern int insn_get_prefixes(struct insn *insn); +extern int insn_get_opcode(struct insn *insn); +extern int insn_get_modrm(struct insn *insn); +extern int insn_get_sib(struct insn *insn); +extern int insn_get_displacement(struct insn *insn); +extern int insn_get_immediate(struct insn *insn); +extern int insn_get_length(struct insn *insn); + +enum insn_mode { + INSN_MODE_32, + INSN_MODE_64, + /* Mode is determined by the current kernel build. */ + INSN_MODE_KERN, + INSN_NUM_MODES, +}; + +extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); + +#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN) /* Attribute will be determined after getting ModRM (for opcode groups) */ static inline void insn_get_attribute(struct insn *insn) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 972a34d93505..144dc164b759 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -91,6 +93,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass @@ -114,6 +117,41 @@ * Not susceptible to * TSX Async Abort (TAA) vulnerabilities. */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ +#define ARCH_CAP_PBRSB_NO BIT(24) /* + * Not susceptible to Post-Barrier + * Return Stack Buffer Predictions. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -131,6 +169,7 @@ /* SRBDS support */ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 #define RNGDS_MITG_DIS BIT(0) +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 @@ -482,6 +521,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/tools/arch/x86/lib/inat.c b/tools/arch/x86/lib/inat.c index 4f5ed49e1b4e..dfbcc6405941 100644 --- a/tools/arch/x86/lib/inat.c +++ b/tools/arch/x86/lib/inat.c @@ -4,7 +4,7 @@ * * Written by Masami Hiramatsu */ -#include "../include/asm/insn.h" +#include "../include/asm/insn.h" /* __ignore_sync_check__ */ /* Attribute tables are generated from opcode map */ #include "inat-tables.c" diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index 0151dfc6da61..f24cc0f618e4 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -10,10 +10,13 @@ #else #include #endif -#include "../include/asm/inat.h" -#include "../include/asm/insn.h" +#include "../include/asm/inat.h" /* __ignore_sync_check__ */ +#include "../include/asm/insn.h" /* __ignore_sync_check__ */ -#include "../include/asm/emulate_prefix.h" +#include +#include + +#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */ /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ @@ -97,8 +100,12 @@ static void insn_get_emulate_prefix(struct insn *insn) * Populates the @insn->prefixes bitmap, and updates @insn->next_byte * to point to the (first) opcode. No effect if @insn->prefixes.got * is already set. + * + * * Returns: + * 0: on success + * < 0: on error */ -void insn_get_prefixes(struct insn *insn) +int insn_get_prefixes(struct insn *insn) { struct insn_field *prefixes = &insn->prefixes; insn_attr_t attr; @@ -106,7 +113,7 @@ void insn_get_prefixes(struct insn *insn) int i, nb; if (prefixes->got) - return; + return 0; insn_get_emulate_prefix(insn); @@ -217,8 +224,10 @@ void insn_get_prefixes(struct insn *insn) prefixes->got = 1; + return 0; + err_out: - return; + return -ENODATA; } /** @@ -230,16 +239,25 @@ void insn_get_prefixes(struct insn *insn) * If necessary, first collects any preceding (prefix) bytes. * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got * is already 1. + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_opcode(struct insn *insn) +int insn_get_opcode(struct insn *insn) { struct insn_field *opcode = &insn->opcode; + int pfx_id, ret; insn_byte_t op; - int pfx_id; + if (opcode->got) - return; - if (!insn->prefixes.got) - insn_get_prefixes(insn); + return 0; + + if (!insn->prefixes.got) { + ret = insn_get_prefixes(insn); + if (ret) + return ret; + } /* Get first opcode */ op = get_next(insn_byte_t, insn); @@ -254,9 +272,13 @@ void insn_get_opcode(struct insn *insn) insn->attr = inat_get_avx_attribute(op, m, p); if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || (!inat_accept_vex(insn->attr) && - !inat_is_group(insn->attr))) - insn->attr = 0; /* This instruction is bad */ - goto end; /* VEX has only 1 byte for opcode */ + !inat_is_group(insn->attr))) { + /* This instruction is bad */ + insn->attr = 0; + return -EINVAL; + } + /* VEX has only 1 byte for opcode */ + goto end; } insn->attr = inat_get_opcode_attribute(op); @@ -267,13 +289,18 @@ void insn_get_opcode(struct insn *insn) pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); } - if (inat_must_vex(insn->attr)) - insn->attr = 0; /* This instruction is bad */ + + if (inat_must_vex(insn->attr)) { + /* This instruction is bad */ + insn->attr = 0; + return -EINVAL; + } end: opcode->got = 1; + return 0; err_out: - return; + return -ENODATA; } /** @@ -283,15 +310,25 @@ void insn_get_opcode(struct insn *insn) * Populates @insn->modrm and updates @insn->next_byte to point past the * ModRM byte, if any. If necessary, first collects the preceding bytes * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_modrm(struct insn *insn) +int insn_get_modrm(struct insn *insn) { struct insn_field *modrm = &insn->modrm; insn_byte_t pfx_id, mod; + int ret; + if (modrm->got) - return; - if (!insn->opcode.got) - insn_get_opcode(insn); + return 0; + + if (!insn->opcode.got) { + ret = insn_get_opcode(insn); + if (ret) + return ret; + } if (inat_has_modrm(insn->attr)) { mod = get_next(insn_byte_t, insn); @@ -301,17 +338,22 @@ void insn_get_modrm(struct insn *insn) pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_group_attribute(mod, pfx_id, insn->attr); - if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) - insn->attr = 0; /* This is bad */ + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) { + /* Bad insn */ + insn->attr = 0; + return -EINVAL; + } } } if (insn->x86_64 && inat_is_force64(insn->attr)) insn->opnd_bytes = 8; + modrm->got = 1; + return 0; err_out: - return; + return -ENODATA; } @@ -325,11 +367,16 @@ void insn_get_modrm(struct insn *insn) int insn_rip_relative(struct insn *insn) { struct insn_field *modrm = &insn->modrm; + int ret; if (!insn->x86_64) return 0; - if (!modrm->got) - insn_get_modrm(insn); + + if (!modrm->got) { + ret = insn_get_modrm(insn); + if (ret) + return 0; + } /* * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. @@ -343,15 +390,25 @@ int insn_rip_relative(struct insn *insn) * * If necessary, first collects the instruction up to and including the * ModRM byte. + * + * Returns: + * 0: if decoding succeeded + * < 0: otherwise. */ -void insn_get_sib(struct insn *insn) +int insn_get_sib(struct insn *insn) { insn_byte_t modrm; + int ret; if (insn->sib.got) - return; - if (!insn->modrm.got) - insn_get_modrm(insn); + return 0; + + if (!insn->modrm.got) { + ret = insn_get_modrm(insn); + if (ret) + return ret; + } + if (insn->modrm.nbytes) { modrm = (insn_byte_t)insn->modrm.value; if (insn->addr_bytes != 2 && @@ -362,8 +419,10 @@ void insn_get_sib(struct insn *insn) } insn->sib.got = 1; + return 0; + err_out: - return; + return -ENODATA; } @@ -374,15 +433,25 @@ void insn_get_sib(struct insn *insn) * If necessary, first collects the instruction up to and including the * SIB byte. * Displacement value is sign-expanded. + * + * * Returns: + * 0: if decoding succeeded + * < 0: otherwise. */ -void insn_get_displacement(struct insn *insn) +int insn_get_displacement(struct insn *insn) { insn_byte_t mod, rm, base; + int ret; if (insn->displacement.got) - return; - if (!insn->sib.got) - insn_get_sib(insn); + return 0; + + if (!insn->sib.got) { + ret = insn_get_sib(insn); + if (ret) + return ret; + } + if (insn->modrm.nbytes) { /* * Interpreting the modrm byte: @@ -425,9 +494,10 @@ void insn_get_displacement(struct insn *insn) } out: insn->displacement.got = 1; + return 0; err_out: - return; + return -ENODATA; } /* Decode moffset16/32/64. Return 0 if failed */ @@ -538,20 +608,30 @@ static int __get_immptr(struct insn *insn) } /** - * insn_get_immediate() - Get the immediates of instruction + * insn_get_immediate() - Get the immediate in an instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * displacement bytes. * Basically, most of immediates are sign-expanded. Unsigned-value can be - * get by bit masking with ((1 << (nbytes * 8)) - 1) + * computed by bit masking with ((1 << (nbytes * 8)) - 1) + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_immediate(struct insn *insn) +int insn_get_immediate(struct insn *insn) { + int ret; + if (insn->immediate.got) - return; - if (!insn->displacement.got) - insn_get_displacement(insn); + return 0; + + if (!insn->displacement.got) { + ret = insn_get_displacement(insn); + if (ret) + return ret; + } if (inat_has_moffset(insn->attr)) { if (!__get_moffset(insn)) @@ -604,9 +684,10 @@ void insn_get_immediate(struct insn *insn) } done: insn->immediate.got = 1; + return 0; err_out: - return; + return -ENODATA; } /** @@ -615,13 +696,58 @@ void insn_get_immediate(struct insn *insn) * * If necessary, first collects the instruction up to and including the * immediates bytes. - */ -void insn_get_length(struct insn *insn) + * + * Returns: + * - 0 on success + * - < 0 on error +*/ +int insn_get_length(struct insn *insn) { + int ret; + if (insn->length) - return; - if (!insn->immediate.got) - insn_get_immediate(insn); + return 0; + + if (!insn->immediate.got) { + ret = insn_get_immediate(insn); + if (ret) + return ret; + } + insn->length = (unsigned char)((unsigned long)insn->next_byte - (unsigned long)insn->kaddr); + + return 0; +} + +/** + * insn_decode() - Decode an x86 instruction + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @buf_len: length of the insn buffer at @kaddr + * @m: insn mode, see enum insn_mode + * + * Returns: + * 0: if decoding succeeded + * < 0: otherwise. + */ +int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m) +{ + int ret; + +#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */ + + if (m == INSN_MODE_KERN) + insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64)); + else + insn_init(insn, kaddr, buf_len, m == INSN_MODE_64); + + ret = insn_get_length(insn); + if (ret) + return ret; + + if (insn_complete(insn)) + return 0; + + return -EINVAL; } diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 1e299ac73c86..59cf2343f3d9 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include .pushsection .noinstr.text, "ax" @@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy) rep movsq movl %edx, %ecx rep movsb - ret + RET SYM_FUNC_END(memcpy) SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(memcpy) @@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb - ret + RET SYM_FUNC_END(memcpy_erms) SYM_FUNC_START_LOCAL(memcpy_orig) @@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq %r9, 1*8(%rdi) movq %r10, -2*8(%rdi, %rdx) movq %r11, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_16bytes: cmpl $8, %edx @@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq -1*8(%rsi, %rdx), %r9 movq %r8, 0*8(%rdi) movq %r9, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_8bytes: cmpl $4, %edx @@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movl -4(%rsi, %rdx), %r8d movl %ecx, (%rdi) movl %r8d, -4(%rdi, %rdx) - retq + RET .p2align 4 .Lless_3bytes: subl $1, %edx @@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movb %cl, (%rdi) .Lend: - retq + RET SYM_FUNC_END(memcpy_orig) .popsection diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0bfd26e4ca9e..d624f2bc42f1 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,7 +3,7 @@ #include #include -#include +#include #include /* @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) movl %edx,%ecx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(__memset) SYM_FUNC_END_ALIAS(memset) EXPORT_SYMBOL(memset) @@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms) movq %rdx,%rcx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(memset_erms) SYM_FUNC_START_LOCAL(memset_orig) @@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig) .Lende: movq %r10,%rax - ret + RET .Lbad_alignment: cmpq $7,%rdx diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative.h similarity index 100% rename from tools/include/asm/alternative-asm.h rename to tools/include/asm/alternative.h diff --git a/tools/include/linux/kconfig.h b/tools/include/linux/kconfig.h new file mode 100644 index 000000000000..13b86bd3b746 --- /dev/null +++ b/tools/include/linux/kconfig.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_KCONFIG_H +#define _TOOLS_LINUX_KCONFIG_H + +/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */ + +#define __ARG_PLACEHOLDER_1 0, +#define __take_second_arg(__ignored, val, ...) val + +/* + * The use of "&&" / "||" is limited in certain expressions. + * The following enable to calculate "and" / "or" with macro expansion only. + */ +#define __and(x, y) ___and(x, y) +#define ___and(x, y) ____and(__ARG_PLACEHOLDER_##x, y) +#define ____and(arg1_or_junk, y) __take_second_arg(arg1_or_junk y, 0) + +#define __or(x, y) ___or(x, y) +#define ___or(x, y) ____or(__ARG_PLACEHOLDER_##x, y) +#define ____or(arg1_or_junk, y) __take_second_arg(arg1_or_junk 1, y) + +/* + * Helper macros to use CONFIG_ options in C/CPP expressions. Note that + * these only work with boolean and tristate options. + */ + +/* + * Getting something that works in C and CPP for an arg that may or may + * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1" + * we match on the placeholder define, insert the "0," for arg1 and generate + * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one). + * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when + * the last step cherry picks the 2nd arg, we get a zero. + */ +#define __is_defined(x) ___is_defined(x) +#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val) +#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0) + +/* + * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0 + * otherwise. For boolean options, this is equivalent to + * IS_ENABLED(CONFIG_FOO). + */ +#define IS_BUILTIN(option) __is_defined(option) + +/* + * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0 + * otherwise. + */ +#define IS_MODULE(option) __is_defined(option##_MODULE) + +/* + * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled + * code can call a function defined in code compiled based on CONFIG_FOO. + * This is similar to IS_ENABLED(), but returns false when invoked from + * built-in code when CONFIG_FOO is set to 'm'. + */ +#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \ + __and(IS_MODULE(option), __is_defined(MODULE))) + +/* + * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm', + * 0 otherwise. + */ +#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option)) + +#endif /* _TOOLS_LINUX_KCONFIG_H */ diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 577f51436cf9..662f19374bd9 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -29,11 +29,19 @@ struct unwind_hint { * * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that * sp_reg+sp_offset points to the iret return frame. + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 -#define UNWIND_HINT_TYPE_RET_OFFSET 3 +#define UNWIND_HINT_TYPE_FUNC 3 +#define UNWIND_HINT_TYPE_ENTRY 4 +#define UNWIND_HINT_TYPE_SAVE 5 +#define UNWIND_HINT_TYPE_RESTORE 6 #ifdef CONFIG_STACK_VALIDATION @@ -96,7 +104,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -120,7 +128,7 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD(func) #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm #endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e440cd7f32a6..7943e748916d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4180,7 +4180,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; @@ -5006,7 +5007,10 @@ struct bpf_pidns_info { /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ struct bpf_sk_lookup { - __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + union { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ + }; __u32 family; /* Protocol family (AF_INET, AF_INET6) */ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index b0bf56c5f120..a1efcfbd8b9e 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately. .format(values)) if len(pids) > 1: sys.exit('Error: Multiple processes found (pids: {}). Use "-p"' - ' to specify the desired pid'.format(" ".join(pids))) + ' to specify the desired pid' + .format(" ".join(map(str, pids)))) namespace.pid = pids[0] argparser = argparse.ArgumentParser(description=description_text, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 61df26f048d9..8fada26529b7 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5928,9 +5928,10 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) */ prog = NULL; for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (strcmp(prog->sec_name, sec_name) == 0) + if (strcmp(obj->programs[i].sec_name, sec_name) == 0) { + prog = &obj->programs[i]; break; + } } if (!prog) { pr_warn("sec '%s': failed to find a BPF program\n", sec_name); @@ -5945,10 +5946,17 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) insn_idx = rec->insn_off / BPF_INSN_SZ; prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { - pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n", - sec_name, insn_idx, i); - err = -EINVAL; - goto out; + /* When __weak subprog is "overridden" by another instance + * of the subprog from a different object file, linker still + * appends all the .BTF.ext info that used to belong to that + * eliminated subprogram. + * This is similar to what x86-64 linker does for relocations. + * So just ignore such relocations just like we ignore + * subprog instructions when discovering subprograms. + */ + pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", + sec_name, i, insn_idx); + continue; } /* no need to apply CO-RE relocation if the program is * not going to be loaded diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index 0542e46c7552..30f38fdc0d56 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt @@ -315,13 +315,15 @@ they mean, and suggestions for how to fix them. function tracing inserts additional calls, which is not obvious from the sources). -10. file.o: warning: func()+0x5c: alternative modifies stack +10. file.o: warning: func()+0x5c: stack layout conflict in alternatives - This means that an alternative includes instructions that modify the - stack. The problem is that there is only one ORC unwind table, this means - that the ORC unwind entries must be valid for each of the alternatives. - The easiest way to enforce this is to ensure alternatives do not contain - any ORC entries, which in turn implies the above constraint. + This means that in the use of the alternative() or ALTERNATIVE() + macro, the code paths have conflicting modifications to the stack. + The problem is that there is only one ORC unwind table, which means + that the ORC unwind entries must be consistent for all possible + instruction boundaries regardless of which code has been patched. + This limitation can be overcome by massaging the alternatives with + NOPs to shift the stack changes around so they no longer conflict. 11. file.o: warning: unannotated intra-function call diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 5cdb19036d7f..a43096f713c7 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -46,10 +46,6 @@ ifeq ($(SRCARCH),x86) SUBCMD_ORC := y endif -ifeq ($(SUBCMD_ORC),y) - CFLAGS += -DINSN_USE_ORC -endif - export SUBCMD_CHECK SUBCMD_ORC export srctree OUTPUT CFLAGS SRCARCH AWK include $(srctree)/tools/build/Makefile.include diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index 4a84c3081b8e..580ce1857585 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -11,10 +11,6 @@ #include "objtool.h" #include "cfi.h" -#ifdef INSN_USE_ORC -#include -#endif - enum insn_type { INSN_JUMP_CONDITIONAL, INSN_JUMP_UNCONDITIONAL, @@ -30,6 +26,7 @@ enum insn_type { INSN_CLAC, INSN_STD, INSN_CLD, + INSN_TRAP, INSN_OTHER, }; @@ -87,7 +84,13 @@ unsigned long arch_jump_destination(struct instruction *insn); unsigned long arch_dest_reloc_offset(int addend); const char *arch_nop_insn(int len); +const char *arch_ret_insn(int len); -int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); +int arch_decode_hint_reg(u8 sp_reg, int *base); + +bool arch_is_retpoline(struct symbol *sym); +bool arch_is_rethunk(struct symbol *sym); + +int arch_rewrite_retpolines(struct objtool_file *file); #endif /* _ARCH_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index cde9c36e40ae..d8f47704fd85 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -16,6 +16,7 @@ #include "../../arch.h" #include "../../warn.h" #include +#include "arch_elf.h" static unsigned char op_to_cfi_reg[][2] = { {CFI_AX, CFI_R8}, @@ -455,6 +456,11 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; + case 0xcc: + /* int3 */ + *type = INSN_TRAP; + break; + case 0xe3: /* jecxz/jrcxz */ *type = INSN_JUMP_CONDITIONAL; @@ -563,8 +569,8 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state) state->cfa.offset = 8; /* initial RA (return address) */ - state->regs[16].base = CFI_CFA; - state->regs[16].offset = -8; + state->regs[CFI_RA].base = CFI_CFA; + state->regs[CFI_RA].offset = -8; } const char *arch_nop_insn(int len) @@ -585,34 +591,52 @@ const char *arch_nop_insn(int len) return nops[len-1]; } -int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) -{ - struct cfi_reg *cfa = &insn->cfi.cfa; +#define BYTE_RET 0xC3 +const char *arch_ret_insn(int len) +{ + static const char ret[5][5] = { + { BYTE_RET }, + { BYTE_RET, 0xcc }, + { BYTE_RET, 0xcc, 0x90 }, + { BYTE_RET, 0xcc, 0x66, 0x90 }, + { BYTE_RET, 0xcc, 0x0f, 0x1f, 0x00 }, + }; + + if (len < 1 || len > 5) { + WARN("invalid RET size: %d\n", len); + return NULL; + } + + return ret[len-1]; +} + +int arch_decode_hint_reg(u8 sp_reg, int *base) +{ switch (sp_reg) { case ORC_REG_UNDEFINED: - cfa->base = CFI_UNDEFINED; + *base = CFI_UNDEFINED; break; case ORC_REG_SP: - cfa->base = CFI_SP; + *base = CFI_SP; break; case ORC_REG_BP: - cfa->base = CFI_BP; + *base = CFI_BP; break; case ORC_REG_SP_INDIRECT: - cfa->base = CFI_SP_INDIRECT; + *base = CFI_SP_INDIRECT; break; case ORC_REG_R10: - cfa->base = CFI_R10; + *base = CFI_R10; break; case ORC_REG_R13: - cfa->base = CFI_R13; + *base = CFI_R13; break; case ORC_REG_DI: - cfa->base = CFI_DI; + *base = CFI_DI; break; case ORC_REG_DX: - cfa->base = CFI_DX; + *base = CFI_DX; break; default: return -1; @@ -620,3 +644,13 @@ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) return 0; } + +bool arch_is_retpoline(struct symbol *sym) +{ + return !strncmp(sym->name, "__x86_indirect_", 15); +} + +bool arch_is_rethunk(struct symbol *sym) +{ + return !strcmp(sym->name, "__x86_return_thunk"); +} diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch_special.h index d818b2bffa02..14271cca0c74 100644 --- a/tools/objtool/arch/x86/include/arch_special.h +++ b/tools/objtool/arch/x86/include/arch_special.h @@ -10,7 +10,7 @@ #define JUMP_ORIG_OFFSET 0 #define JUMP_NEW_OFFSET 4 -#define ALT_ENTRY_SIZE 13 +#define ALT_ENTRY_SIZE 12 #define ALT_ORIG_OFFSET 0 #define ALT_NEW_OFFSET 4 #define ALT_FEATURE_OFFSET 8 diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index b84cdc72b51f..5b2f3a09c326 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -18,7 +18,8 @@ #include "builtin.h" #include "objtool.h" -bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; +bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, + validate_dup, vmlinux, mcount, noinstr, sls, unret, rethunk; static const char * const check_usage[] = { "objtool check [] file.o", @@ -29,6 +30,8 @@ const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), + OPT_BOOLEAN(0, "rethunk", &rethunk, "validate and annotate rethunk usage"), + OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"), OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), @@ -37,6 +40,7 @@ const struct option check_options[] = { OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"), OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), + OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"), OPT_END(), }; diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 7b31121fa60b..508bdf6ae8dc 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -51,11 +51,7 @@ int cmd_orc(int argc, const char **argv) if (list_empty(&file->insn_list)) return 0; - ret = create_orc(file); - if (ret) - return ret; - - ret = create_orc_sections(file); + ret = orc_create(file); if (ret) return ret; diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index 2502bb27de17..94e653d4d230 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -8,7 +8,8 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; +extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, + validate_dup, vmlinux, mcount, noinstr, sls, unret, rethunk; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h index c7c59c6a44ee..f579802d7ec2 100644 --- a/tools/objtool/cfi.h +++ b/tools/objtool/cfi.h @@ -7,6 +7,7 @@ #define _OBJTOOL_CFI_H #include "cfi_regs.h" +#include #define CFI_UNDEFINED -1 #define CFI_CFA -2 @@ -24,6 +25,7 @@ struct cfi_init_state { }; struct cfi_state { + struct hlist_node hash; /* must be first, cficmp() */ struct cfi_reg regs[CFI_NUM_REGS]; struct cfi_reg vals[CFI_NUM_REGS]; struct cfi_reg cfa; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4c114338b75b..836c9195210e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -5,6 +5,8 @@ #include #include +#include +#include #include "builtin.h" #include "cfi.h" @@ -19,15 +21,17 @@ #include #include -#define FAKE_JUMP_OFFSET -1 - struct alternative { struct list_head list; struct instruction *insn; bool skip_orig; }; -struct cfi_init_state initial_func_cfi; +static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache; + +static struct cfi_init_state initial_func_cfi; +static struct cfi_state init_cfi; +static struct cfi_state func_cfi; struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset) @@ -109,17 +113,34 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, for (insn = next_insn_same_sec(file, insn); insn; \ insn = next_insn_same_sec(file, insn)) +static bool is_jump_table_jump(struct instruction *insn) +{ + struct alt_group *alt_group = insn->alt_group; + + if (insn->jump_table) + return true; + + /* Retpoline alternative for a jump table? */ + return alt_group && alt_group->orig_group && + alt_group->orig_group->first_insn->jump_table; +} + static bool is_sibling_call(struct instruction *insn) { - /* An indirect jump is either a sibling call or a jump to a table. */ - if (insn->type == INSN_JUMP_DYNAMIC) - return list_empty(&insn->alts); - - if (!is_static_jump(insn)) + /* + * Assume only ELF functions can make sibling calls. This ensures + * sibling call detection consistency between vmlinux.o and individual + * objects. + */ + if (!insn->func) return false; + /* An indirect jump is either a sibling call or a jump to a table. */ + if (insn->type == INSN_JUMP_DYNAMIC) + return !is_jump_table_jump(insn); + /* add_jump_destinations() sets insn->call_dest for sibling calls. */ - return !!insn->call_dest; + return (is_static_jump(insn) && insn->call_dest); } /* @@ -250,6 +271,78 @@ static void init_insn_state(struct insn_state *state, struct section *sec) state->noinstr = sec->noinstr; } +static struct cfi_state *cfi_alloc(void) +{ + struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1); + if (!cfi) { + WARN("calloc failed"); + exit(1); + } + nr_cfi++; + return cfi; +} + +static int cfi_bits; +static struct hlist_head *cfi_hash; + +static inline bool cficmp(struct cfi_state *cfi1, struct cfi_state *cfi2) +{ + return memcmp((void *)cfi1 + sizeof(cfi1->hash), + (void *)cfi2 + sizeof(cfi2->hash), + sizeof(struct cfi_state) - sizeof(struct hlist_node)); +} + +static inline u32 cfi_key(struct cfi_state *cfi) +{ + return jhash((void *)cfi + sizeof(cfi->hash), + sizeof(*cfi) - sizeof(cfi->hash), 0); +} + +static struct cfi_state *cfi_hash_find_or_add(struct cfi_state *cfi) +{ + struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; + struct cfi_state *obj; + + hlist_for_each_entry(obj, head, hash) { + if (!cficmp(cfi, obj)) { + nr_cfi_cache++; + return obj; + } + } + + obj = cfi_alloc(); + *obj = *cfi; + hlist_add_head(&obj->hash, head); + + return obj; +} + +static void cfi_hash_add(struct cfi_state *cfi) +{ + struct hlist_head *head = &cfi_hash[hash_min(cfi_key(cfi), cfi_bits)]; + + hlist_add_head(&cfi->hash, head); +} + +static void *cfi_hash_alloc(void) +{ + cfi_bits = vmlinux ? ELF_HASH_BITS - 3 : 13; + cfi_hash = mmap(NULL, sizeof(struct hlist_head) << cfi_bits, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (cfi_hash == (void *)-1L) { + WARN("mmap fail cfi_hash"); + cfi_hash = NULL; + } else if (stats) { + printf("cfi_bits: %d\n", cfi_bits); + } + + return cfi_hash; +} + +static unsigned long nr_insns; +static unsigned long nr_insns_visited; + /* * Call the arch-specific instruction decoder for all the instructions and add * them to the global instruction list. @@ -260,7 +353,6 @@ static int decode_instructions(struct objtool_file *file) struct symbol *func; unsigned long offset; struct instruction *insn; - unsigned long nr_insns = 0; int ret; for_each_sec(file, sec) { @@ -274,7 +366,8 @@ static int decode_instructions(struct objtool_file *file) sec->text = true; if (!strcmp(sec->name, ".noinstr.text") || - !strcmp(sec->name, ".entry.text")) + !strcmp(sec->name, ".entry.text") || + !strncmp(sec->name, ".text.__x86.", 12)) sec->noinstr = true; for (offset = 0; offset < sec->len; offset += insn->len) { @@ -286,7 +379,6 @@ static int decode_instructions(struct objtool_file *file) memset(insn, 0, sizeof(*insn)); INIT_LIST_HEAD(&insn->alts); INIT_LIST_HEAD(&insn->stack_ops); - init_cfi_state(&insn->cfi); insn->sec = sec; insn->offset = offset; @@ -377,12 +469,12 @@ static int add_dead_ends(struct objtool_file *file) else if (reloc->addend == reloc->sym->sec->len) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { - WARN("can't find unreachable insn at %s+0x%x", + WARN("can't find unreachable insn at %s+0x%" PRIx64, reloc->sym->sec->name, reloc->addend); return -1; } } else { - WARN("can't find unreachable insn at %s+0x%x", + WARN("can't find unreachable insn at %s+0x%" PRIx64, reloc->sym->sec->name, reloc->addend); return -1; } @@ -412,12 +504,12 @@ static int add_dead_ends(struct objtool_file *file) else if (reloc->addend == reloc->sym->sec->len) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { - WARN("can't find reachable insn at %s+0x%x", + WARN("can't find reachable insn at %s+0x%" PRIx64, reloc->sym->sec->name, reloc->addend); return -1; } } else { - WARN("can't find reachable insn at %s+0x%x", + WARN("can't find reachable insn at %s+0x%" PRIx64, reloc->sym->sec->name, reloc->addend); return -1; } @@ -430,8 +522,7 @@ static int add_dead_ends(struct objtool_file *file) static int create_static_call_sections(struct objtool_file *file) { - struct section *sec, *reloc_sec; - struct reloc *reloc; + struct section *sec; struct static_call_site *site; struct instruction *insn; struct symbol *key_sym; @@ -449,7 +540,7 @@ static int create_static_call_sections(struct objtool_file *file) return 0; idx = 0; - list_for_each_entry(insn, &file->static_call_list, static_call_node) + list_for_each_entry(insn, &file->static_call_list, call_node) idx++; sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, @@ -457,36 +548,18 @@ static int create_static_call_sections(struct objtool_file *file) if (!sec) return -1; - reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!reloc_sec) - return -1; - idx = 0; - list_for_each_entry(insn, &file->static_call_list, static_call_node) { + list_for_each_entry(insn, &file->static_call_list, call_node) { site = (struct static_call_site *)sec->data->d_buf + idx; memset(site, 0, sizeof(struct static_call_site)); /* populate reloc for 'addr' */ - reloc = malloc(sizeof(*reloc)); - - if (!reloc) { - perror("malloc"); + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(struct static_call_site), + R_X86_64_PC32, + insn->sec, insn->offset)) return -1; - } - memset(reloc, 0, sizeof(*reloc)); - - insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc); - if (!reloc->sym) { - WARN_FUNC("static call tramp: missing containing symbol", - insn->sec, insn->offset); - return -1; - } - - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(struct static_call_site); - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); /* find key symbol */ key_name = strdup(insn->call_dest->name); @@ -523,32 +596,113 @@ static int create_static_call_sections(struct objtool_file *file) free(key_name); /* populate reloc for 'key' */ - reloc = malloc(sizeof(*reloc)); - if (!reloc) { - perror("malloc"); + if (elf_add_reloc(file->elf, sec, + idx * sizeof(struct static_call_site) + 4, + R_X86_64_PC32, key_sym, + is_sibling_call(insn) * STATIC_CALL_SITE_TAIL)) return -1; - } - memset(reloc, 0, sizeof(*reloc)); - reloc->sym = key_sym; - reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(struct static_call_site) + 4; - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); idx++; } - if (elf_rebuild_reloc_section(file->elf, reloc_sec)) + return 0; +} + +static int create_retpoline_sites_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + int idx; + + sec = find_section_by_name(file->elf, ".retpoline_sites"); + if (sec) { + WARN("file already has .retpoline_sites, skipping"); + return 0; + } + + idx = 0; + list_for_each_entry(insn, &file->retpoline_call_list, call_node) + idx++; + + if (!idx) + return 0; + + sec = elf_create_section(file->elf, ".retpoline_sites", 0, + sizeof(int), idx); + if (!sec) { + WARN("elf_create_section: .retpoline_sites"); return -1; + } + + idx = 0; + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + + int *site = (int *)sec->data->d_buf + idx; + *site = 0; + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(int), + R_X86_64_PC32, + insn->sec, insn->offset)) { + WARN("elf_add_reloc_to_insn: .retpoline_sites"); + return -1; + } + + idx++; + } + + return 0; +} + +static int create_return_sites_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + int idx; + + sec = find_section_by_name(file->elf, ".return_sites"); + if (sec) { + WARN("file already has .return_sites, skipping"); + return 0; + } + + idx = 0; + list_for_each_entry(insn, &file->return_thunk_list, call_node) + idx++; + + if (!idx) + return 0; + + sec = elf_create_section(file->elf, ".return_sites", 0, + sizeof(int), idx); + if (!sec) { + WARN("elf_create_section: .return_sites"); + return -1; + } + + idx = 0; + list_for_each_entry(insn, &file->return_thunk_list, call_node) { + + int *site = (int *)sec->data->d_buf + idx; + *site = 0; + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(int), + R_X86_64_PC32, + insn->sec, insn->offset)) { + WARN("elf_add_reloc_to_insn: .return_sites"); + return -1; + } + + idx++; + } return 0; } static int create_mcount_loc_sections(struct objtool_file *file) { - struct section *sec, *reloc_sec; - struct reloc *reloc; + struct section *sec; unsigned long *loc; struct instruction *insn; int idx; @@ -571,49 +725,21 @@ static int create_mcount_loc_sections(struct objtool_file *file) if (!sec) return -1; - reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!reloc_sec) - return -1; - idx = 0; list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) { loc = (unsigned long *)sec->data->d_buf + idx; memset(loc, 0, sizeof(unsigned long)); - reloc = malloc(sizeof(*reloc)); - if (!reloc) { - perror("malloc"); + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(unsigned long), + R_X86_64_64, + insn->sec, insn->offset)) return -1; - } - memset(reloc, 0, sizeof(*reloc)); - - if (insn->sec->sym) { - reloc->sym = insn->sec->sym; - reloc->addend = insn->offset; - } else { - reloc->sym = find_symbol_containing(insn->sec, insn->offset); - - if (!reloc->sym) { - WARN("missing symbol for insn at offset 0x%lx\n", - insn->offset); - return -1; - } - - reloc->addend = insn->offset - reloc->sym->offset; - } - - reloc->type = R_X86_64_64; - reloc->offset = idx * sizeof(unsigned long); - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); idx++; } - if (elf_rebuild_reloc_section(file->elf, reloc_sec)) - return -1; - return 0; } @@ -847,6 +973,172 @@ static int add_ignore_alternatives(struct objtool_file *file) return 0; } +__weak bool arch_is_retpoline(struct symbol *sym) +{ + return false; +} + +__weak bool arch_is_rethunk(struct symbol *sym) +{ + return false; +} + +#define NEGATIVE_RELOC ((void *)-1L) + +static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) +{ + if (insn->reloc == NEGATIVE_RELOC) + return NULL; + + if (!insn->reloc) { + insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); + if (!insn->reloc) { + insn->reloc = NEGATIVE_RELOC; + return NULL; + } + } + + return insn->reloc; +} + +static void remove_insn_ops(struct instruction *insn) +{ + struct stack_op *op, *tmp; + + list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) { + list_del(&op->list); + free(op); + } +} + +static void annotate_call_site(struct objtool_file *file, + struct instruction *insn, bool sibling) +{ + struct reloc *reloc = insn_reloc(file, insn); + struct symbol *sym = insn->call_dest; + + if (!sym) + sym = reloc->sym; + + /* + * Alternative replacement code is just template code which is + * sometimes copied to the original instruction. For now, don't + * annotate it. (In the future we might consider annotating the + * original instruction if/when it ever makes sense to do so.) + */ + if (!strcmp(insn->sec->name, ".altinstr_replacement")) + return; + + if (sym->static_call_tramp) { + list_add_tail(&insn->call_node, &file->static_call_list); + return; + } + + if (sym->retpoline_thunk) { + list_add_tail(&insn->call_node, &file->retpoline_call_list); + return; + } + + /* + * Many compilers cannot disable KCOV with a function attribute + * so they need a little help, NOP out any KCOV calls from noinstr + * text. + */ + if (insn->sec->noinstr && sym->kcov) { + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); + } + + elf_write_insn(file->elf, insn->sec, + insn->offset, insn->len, + sibling ? arch_ret_insn(insn->len) + : arch_nop_insn(insn->len)); + + insn->type = sibling ? INSN_RETURN : INSN_NOP; + + if (sibling) { + /* + * We've replaced the tail-call JMP insn by two new + * insn: RET; INT3, except we only have a single struct + * insn here. Mark it retpoline_safe to avoid the SLS + * warning, instead of adding another insn. + */ + insn->retpoline_safe = true; + } + + return; + } +} + +static void add_call_dest(struct objtool_file *file, struct instruction *insn, + struct symbol *dest, bool sibling) +{ + insn->call_dest = dest; + if (!dest) + return; + + /* + * Whatever stack impact regular CALLs have, should be undone + * by the RETURN of the called function. + * + * Annotated intra-function calls retain the stack_ops but + * are converted to JUMP, see read_intra_function_calls(). + */ + remove_insn_ops(insn); + + annotate_call_site(file, insn, sibling); +} + +static void add_retpoline_call(struct objtool_file *file, struct instruction *insn) +{ + /* + * Retpoline calls/jumps are really dynamic calls/jumps in disguise, + * so convert them accordingly. + */ + switch (insn->type) { + case INSN_CALL: + insn->type = INSN_CALL_DYNAMIC; + break; + case INSN_JUMP_UNCONDITIONAL: + insn->type = INSN_JUMP_DYNAMIC; + break; + case INSN_JUMP_CONDITIONAL: + insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; + break; + default: + return; + } + + insn->retpoline_safe = true; + + /* + * Whatever stack impact regular CALLs have, should be undone + * by the RETURN of the called function. + * + * Annotated intra-function calls retain the stack_ops but + * are converted to JUMP, see read_intra_function_calls(). + */ + remove_insn_ops(insn); + + annotate_call_site(file, insn, false); +} + +static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add) +{ + /* + * Return thunk tail calls are really just returns in disguise, + * so convert them accordingly. + */ + insn->type = INSN_RETURN; + insn->retpoline_safe = true; + + /* Skip the non-text sections, specially .discard ones */ + if (add && insn->sec->text) + list_add_tail(&insn->call_node, &file->return_thunk_list); +} + /* * CONFIG_CFI_CLANG: Check if the section is a CFI jump table or a * compiler-generated CFI handler. @@ -892,41 +1184,29 @@ static int add_jump_destinations(struct objtool_file *file) if (!is_static_jump(insn)) continue; - if (insn->offset == FAKE_JUMP_OFFSET) - continue; - - reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + reloc = insn_reloc(file, insn); if (!reloc) { dest_sec = insn->sec; dest_off = arch_jump_destination(insn); } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); + } else if (reloc->sym->retpoline_thunk) { + add_retpoline_call(file, insn); + continue; + } else if (reloc->sym->return_thunk) { + add_return_call(file, insn, true); + continue; + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ + add_call_dest(file, insn, reloc->sym, true); + continue; } else if (reloc->sym->sec->idx) { dest_sec = reloc->sym->sec; dest_off = reloc->sym->sym.st_value + arch_dest_reloc_offset(reloc->addend); - } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) || - !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) { - /* - * Retpoline jumps are really dynamic jumps in - * disguise, so convert them accordingly. - */ - if (insn->type == INSN_JUMP_UNCONDITIONAL) - insn->type = INSN_JUMP_DYNAMIC; - else - insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; - - insn->retpoline_safe = true; - continue; } else { - /* external sibling call */ - insn->call_dest = reloc->sym; - if (insn->call_dest->static_call_tramp) { - list_add_tail(&insn->static_call_node, - &file->static_call_list); - } + /* non-func asm code jumping to another file */ continue; } @@ -936,6 +1216,7 @@ static int add_jump_destinations(struct objtool_file *file) insn->jump_dest = find_last_insn(file, dest_sec); if (!insn->jump_dest) { + struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off); /* * This is a special case where an alt instruction @@ -948,6 +1229,19 @@ static int add_jump_destinations(struct objtool_file *file) if (is_cfi_section(insn->sec)) continue; + /* + * This is a special case for zen_untrain_ret(). + * It jumps to __x86_return_thunk(), but objtool + * can't find the thunk's starting RET + * instruction, because the RET is also in the + * middle of another instruction. Objtool only + * knows about the outer instruction. + */ + if (sym && sym->return_thunk) { + add_return_call(file, insn, false); + continue; + } + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", insn->sec, insn->offset, dest_sec->name, dest_off); @@ -982,13 +1276,8 @@ static int add_jump_destinations(struct objtool_file *file) } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && insn->jump_dest->offset == insn->jump_dest->func->offset) { - - /* internal sibling call */ - insn->call_dest = insn->jump_dest->func; - if (insn->call_dest->static_call_tramp) { - list_add_tail(&insn->static_call_node, - &file->static_call_list); - } + /* internal sibling call (without reloc) */ + add_call_dest(file, insn, insn->jump_dest->func, true); } } } @@ -996,16 +1285,6 @@ static int add_jump_destinations(struct objtool_file *file) return 0; } -static void remove_insn_ops(struct instruction *insn) -{ - struct stack_op *op, *tmp; - - list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) { - list_del(&op->list); - free(op); - } -} - static struct symbol *find_call_destination(struct section *sec, unsigned long offset) { struct symbol *call_dest; @@ -1024,17 +1303,19 @@ static int add_call_destinations(struct objtool_file *file) { struct instruction *insn; unsigned long dest_off; + struct symbol *dest; struct reloc *reloc; for_each_insn(file, insn) { if (insn->type != INSN_CALL) continue; - reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + reloc = insn_reloc(file, insn); if (!reloc) { dest_off = arch_jump_destination(insn); - insn->call_dest = find_call_destination(insn->sec, dest_off); + dest = find_call_destination(insn->sec, dest_off); + + add_call_dest(file, insn, dest, false); if (insn->ignore) continue; @@ -1052,9 +1333,8 @@ static int add_call_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_off = arch_dest_reloc_offset(reloc->addend); - insn->call_dest = find_call_destination(reloc->sym->sec, - dest_off); - if (!insn->call_dest) { + dest = find_call_destination(reloc->sym->sec, dest_off); + if (!dest) { if (is_cfi_section(reloc->sym->sec)) continue; @@ -1064,129 +1344,96 @@ static int add_call_destinations(struct objtool_file *file) dest_off); return -1; } + + add_call_dest(file, insn, dest, false); + + } else if (reloc->sym->retpoline_thunk) { + add_retpoline_call(file, insn); + } else - insn->call_dest = reloc->sym; - - if (insn->call_dest && insn->call_dest->static_call_tramp) { - list_add_tail(&insn->static_call_node, - &file->static_call_list); - } - - /* - * Many compilers cannot disable KCOV with a function attribute - * so they need a little help, NOP out any KCOV calls from noinstr - * text. - */ - if (insn->sec->noinstr && - !strncmp(insn->call_dest->name, "__sanitizer_cov_", 16)) { - if (reloc) { - reloc->type = R_NONE; - elf_write_reloc(file->elf, reloc); - } - - elf_write_insn(file->elf, insn->sec, - insn->offset, insn->len, - arch_nop_insn(insn->len)); - insn->type = INSN_NOP; - } - - if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) { - if (reloc) { - reloc->type = R_NONE; - elf_write_reloc(file->elf, reloc); - } - - elf_write_insn(file->elf, insn->sec, - insn->offset, insn->len, - arch_nop_insn(insn->len)); - - insn->type = INSN_NOP; - - list_add_tail(&insn->mcount_loc_node, - &file->mcount_loc_list); - } - - /* - * Whatever stack impact regular CALLs have, should be undone - * by the RETURN of the called function. - * - * Annotated intra-function calls retain the stack_ops but - * are converted to JUMP, see read_intra_function_calls(). - */ - remove_insn_ops(insn); + add_call_dest(file, insn, reloc->sym, false); } return 0; } /* - * The .alternatives section requires some extra special care, over and above - * what other special sections require: - * - * 1. Because alternatives are patched in-place, we need to insert a fake jump - * instruction at the end so that validate_branch() skips all the original - * replaced instructions when validating the new instruction path. - * - * 2. An added wrinkle is that the new instruction length might be zero. In - * that case the old instructions are replaced with noops. We simulate that - * by creating a fake jump as the only new instruction. - * - * 3. In some cases, the alternative section includes an instruction which - * conditionally jumps to the _end_ of the entry. We have to modify these - * jumps' destinations to point back to .text rather than the end of the - * entry in .altinstr_replacement. + * The .alternatives section requires some extra special care over and above + * other special sections because alternatives are patched in place. */ static int handle_group_alt(struct objtool_file *file, struct special_alt *special_alt, struct instruction *orig_insn, struct instruction **new_insn) { - static unsigned int alt_group_next_index = 1; - struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; - unsigned int alt_group = alt_group_next_index++; + struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL; + struct alt_group *orig_alt_group, *new_alt_group; unsigned long dest_off; + + orig_alt_group = malloc(sizeof(*orig_alt_group)); + if (!orig_alt_group) { + WARN("malloc failed"); + return -1; + } + orig_alt_group->cfi = calloc(special_alt->orig_len, + sizeof(struct cfi_state *)); + if (!orig_alt_group->cfi) { + WARN("calloc failed"); + return -1; + } + last_orig_insn = NULL; insn = orig_insn; sec_for_each_insn_from(file, insn) { if (insn->offset >= special_alt->orig_off + special_alt->orig_len) break; - insn->alt_group = alt_group; + insn->alt_group = orig_alt_group; last_orig_insn = insn; } + orig_alt_group->orig_group = NULL; + orig_alt_group->first_insn = orig_insn; + orig_alt_group->last_insn = last_orig_insn; - if (next_insn_same_sec(file, last_orig_insn)) { - fake_jump = malloc(sizeof(*fake_jump)); - if (!fake_jump) { + + new_alt_group = malloc(sizeof(*new_alt_group)); + if (!new_alt_group) { + WARN("malloc failed"); + return -1; + } + + if (special_alt->new_len < special_alt->orig_len) { + /* + * Insert a fake nop at the end to make the replacement + * alt_group the same size as the original. This is needed to + * allow propagate_alt_cfi() to do its magic. When the last + * instruction affects the stack, the instruction after it (the + * nop) will propagate the new state to the shared CFI array. + */ + nop = malloc(sizeof(*nop)); + if (!nop) { WARN("malloc failed"); return -1; } - memset(fake_jump, 0, sizeof(*fake_jump)); - INIT_LIST_HEAD(&fake_jump->alts); - INIT_LIST_HEAD(&fake_jump->stack_ops); - init_cfi_state(&fake_jump->cfi); + memset(nop, 0, sizeof(*nop)); + INIT_LIST_HEAD(&nop->alts); + INIT_LIST_HEAD(&nop->stack_ops); - fake_jump->sec = special_alt->new_sec; - fake_jump->offset = FAKE_JUMP_OFFSET; - fake_jump->type = INSN_JUMP_UNCONDITIONAL; - fake_jump->jump_dest = list_next_entry(last_orig_insn, list); - fake_jump->func = orig_insn->func; + nop->sec = special_alt->new_sec; + nop->offset = special_alt->new_off + special_alt->new_len; + nop->len = special_alt->orig_len - special_alt->new_len; + nop->type = INSN_NOP; + nop->func = orig_insn->func; + nop->alt_group = new_alt_group; + nop->ignore = orig_insn->ignore_alts; } if (!special_alt->new_len) { - if (!fake_jump) { - WARN("%s: empty alternative at end of section", - special_alt->orig_sec->name); - return -1; - } - - *new_insn = fake_jump; - return 0; + *new_insn = nop; + goto end; } - last_new_insn = NULL; - alt_group = alt_group_next_index++; insn = *new_insn; sec_for_each_insn_from(file, insn) { struct reloc *alt_reloc; @@ -1198,7 +1445,7 @@ static int handle_group_alt(struct objtool_file *file, insn->ignore = orig_insn->ignore_alts; insn->func = orig_insn->func; - insn->alt_group = alt_group; + insn->alt_group = new_alt_group; /* * Since alternative replacement code is copy/pasted by the @@ -1208,8 +1455,7 @@ static int handle_group_alt(struct objtool_file *file, * alternatives code can adjust the relative offsets * accordingly. */ - alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + alt_reloc = insn_reloc(file, insn); if (alt_reloc && !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { @@ -1225,14 +1471,8 @@ static int handle_group_alt(struct objtool_file *file, continue; dest_off = arch_jump_destination(insn); - if (dest_off == special_alt->new_off + special_alt->new_len) { - if (!fake_jump) { - WARN("%s: alternative jump to end of section", - special_alt->orig_sec->name); - return -1; - } - insn->jump_dest = fake_jump; - } + if (dest_off == special_alt->new_off + special_alt->new_len) + insn->jump_dest = next_insn_same_sec(file, last_orig_insn); if (!insn->jump_dest) { WARN_FUNC("can't find alternative jump destination", @@ -1247,9 +1487,13 @@ static int handle_group_alt(struct objtool_file *file, return -1; } - if (fake_jump) - list_add(&fake_jump->list, &last_new_insn->list); - + if (nop) + list_add(&nop->list, &last_new_insn->list); +end: + new_alt_group->orig_group = orig_alt_group; + new_alt_group->first_insn = *new_insn; + new_alt_group->last_insn = nop ? : last_new_insn; + new_alt_group->cfi = orig_alt_group->cfi; return 0; } @@ -1541,13 +1785,21 @@ static int add_jump_table_alts(struct objtool_file *file) return 0; } +static void set_func_state(struct cfi_state *state) +{ + state->cfa = initial_func_cfi.cfa; + memcpy(&state->regs, &initial_func_cfi.regs, + CFI_NUM_REGS * sizeof(struct cfi_reg)); + state->stack_size = initial_func_cfi.cfa.offset; +} + static int read_unwind_hints(struct objtool_file *file) { + struct cfi_state cfi = init_cfi; struct section *sec, *relocsec; - struct reloc *reloc; struct unwind_hint *hint; struct instruction *insn; - struct cfi_reg *cfa; + struct reloc *reloc; int i; sec = find_section_by_name(file->elf, ".discard.unwind_hints"); @@ -1582,24 +1834,51 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } - cfa = &insn->cfi.cfa; + insn->hint = true; - if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) { - insn->ret_offset = hint->sp_offset; + if (hint->type == UNWIND_HINT_TYPE_SAVE) { + insn->hint = false; + insn->save = true; continue; } - insn->hint = true; + if (hint->type == UNWIND_HINT_TYPE_RESTORE) { + insn->restore = true; + continue; + } - if (arch_decode_hint_reg(insn, hint->sp_reg)) { + if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); + + if (sym && sym->bind == STB_GLOBAL) { + insn->entry = 1; + } + } + + if (hint->type == UNWIND_HINT_TYPE_ENTRY) { + hint->type = UNWIND_HINT_TYPE_CALL; + insn->entry = 1; + } + + if (hint->type == UNWIND_HINT_TYPE_FUNC) { + insn->cfi = &func_cfi; + continue; + } + + if (insn->cfi) + cfi = *(insn->cfi); + + if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) { WARN_FUNC("unsupported unwind_hint sp base reg %d", insn->sec, insn->offset, hint->sp_reg); return -1; } - cfa->offset = hint->sp_offset; - insn->cfi.type = hint->type; - insn->cfi.end = hint->end; + cfi.cfa.offset = hint->sp_offset; + cfi.type = hint->type; + cfi.end = hint->end; + + insn->cfi = cfi_hash_find_or_add(&cfi); } return 0; @@ -1628,8 +1907,10 @@ static int read_retpoline_hints(struct objtool_file *file) } if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC) { - WARN_FUNC("retpoline_safe hint not an indirect jump/call", + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN && + insn->type != INSN_NOP) { + WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop", insn->sec, insn->offset); return -1; } @@ -1738,17 +2019,31 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } -static int read_static_call_tramps(struct objtool_file *file) +static int classify_symbols(struct objtool_file *file) { struct section *sec; struct symbol *func; for_each_sec(file, sec) { list_for_each_entry(func, &sec->symbol_list, list) { - if (func->bind == STB_GLOBAL && - !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + if (func->bind != STB_GLOBAL) + continue; + + if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, strlen(STATIC_CALL_TRAMP_PREFIX_STR))) func->static_call_tramp = true; + + if (arch_is_retpoline(func)) + func->retpoline_thunk = true; + + if (arch_is_rethunk(func)) + func->return_thunk = true; + + if (!strcmp(func->name, "__fentry__")) + func->fentry = true; + + if (!strncmp(func->name, "__sanitizer_cov_", 16)) + func->kcov = true; } } @@ -1806,10 +2101,14 @@ static int decode_sections(struct objtool_file *file) /* * Must be before add_{jump_call}_destination. */ - ret = read_static_call_tramps(file); + ret = classify_symbols(file); if (ret) return ret; + /* + * Must be before add_special_section_alts() as that depends on + * jump_dest being set. + */ ret = add_jump_destinations(file); if (ret) return ret; @@ -1851,9 +2150,9 @@ static int decode_sections(struct objtool_file *file) static bool is_fentry_call(struct instruction *insn) { - if (insn->type == INSN_CALL && insn->call_dest && - insn->call_dest->type == STT_NOTYPE && - !strcmp(insn->call_dest->name, "__fentry__")) + if (insn->type == INSN_CALL && + insn->call_dest && + insn->call_dest->fentry) return true; return false; @@ -1861,27 +2160,18 @@ static bool is_fentry_call(struct instruction *insn) static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state) { - u8 ret_offset = insn->ret_offset; struct cfi_state *cfi = &state->cfi; int i; if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap) return true; - if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset) + if (cfi->cfa.offset != initial_func_cfi.cfa.offset) return true; - if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset) + if (cfi->stack_size != initial_func_cfi.cfa.offset) return true; - /* - * If there is a ret offset hint then don't check registers - * because a callee-saved register might have been pushed on - * the stack. - */ - if (ret_offset) - return false; - for (i = 0; i < CFI_NUM_REGS; i++) { if (cfi->regs[i].base != initial_func_cfi.regs[i].base || cfi->regs[i].offset != initial_func_cfi.regs[i].offset) @@ -2350,22 +2640,52 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, return 0; } +/* + * The stack layouts of alternatives instructions can sometimes diverge when + * they have stack modifications. That's fine as long as the potential stack + * layouts don't conflict at any given potential instruction boundary. + * + * Flatten the CFIs of the different alternative code streams (both original + * and replacement) into a single shared CFI array which can be used to detect + * conflicts and nicely feed a linear array of ORC entries to the unwinder. + */ +static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn) +{ + struct cfi_state **alt_cfi; + int group_off; + + if (!insn->alt_group) + return 0; + + if (!insn->cfi) { + WARN("CFI missing"); + return -1; + } + + alt_cfi = insn->alt_group->cfi; + group_off = insn->offset - insn->alt_group->first_insn->offset; + + if (!alt_cfi[group_off]) { + alt_cfi[group_off] = insn->cfi; + } else { + if (cficmp(alt_cfi[group_off], insn->cfi)) { + WARN_FUNC("stack layout conflict in alternatives", + insn->sec, insn->offset); + return -1; + } + } + + return 0; +} + static int handle_insn_ops(struct instruction *insn, struct insn_state *state) { struct stack_op *op; list_for_each_entry(op, &insn->stack_ops, list) { - struct cfi_state old_cfi = state->cfi; - int res; - res = update_cfi_state(insn, &state->cfi, op); - if (res) - return res; - - if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) { - WARN_FUNC("alternative modifies stack", insn->sec, insn->offset); - return -1; - } + if (update_cfi_state(insn, &state->cfi, op)) + return 1; if (op->dest.type == OP_DEST_PUSHF) { if (!state->uaccess_stack) { @@ -2394,9 +2714,14 @@ static int handle_insn_ops(struct instruction *insn, struct insn_state *state) static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2) { - struct cfi_state *cfi1 = &insn->cfi; + struct cfi_state *cfi1 = insn->cfi; int i; + if (!cfi1) { + WARN("CFI missing"); + return false; + } + if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) { WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", @@ -2555,28 +2880,20 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct return 0; } -/* - * Alternatives should not contain any ORC entries, this in turn means they - * should not contain any CFI ops, which implies all instructions should have - * the same same CFI state. - * - * It is possible to constuct alternatives that have unreachable holes that go - * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED - * states which then results in ORC entries, which we just said we didn't want. - * - * Avoid them by copying the CFI entry of the first instruction into the whole - * alternative. - */ -static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn) +static struct instruction *next_insn_to_validate(struct objtool_file *file, + struct instruction *insn) { - struct instruction *first_insn = insn; - int alt_group = insn->alt_group; + struct alt_group *alt_group = insn->alt_group; - sec_for_each_insn_continue(file, insn) { - if (insn->alt_group != alt_group) - break; - insn->cfi = first_insn->cfi; - } + /* + * Simulate the fact that alternatives are patched in-place. When the + * end of a replacement alt_group is reached, redirect objtool flow to + * the end of the original alt_group. + */ + if (alt_group && insn == alt_group->last_insn && alt_group->orig_group) + return next_insn_same_sec(file, alt_group->orig_group->last_insn); + + return next_insn_same_sec(file, insn); } /* @@ -2589,7 +2906,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state state) { struct alternative *alt; - struct instruction *next_insn; + struct instruction *next_insn, *prev_insn = NULL; struct section *sec; u8 visited; int ret; @@ -2597,7 +2914,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, sec = insn->sec; while (1) { - next_insn = next_insn_same_sec(file, insn); + next_insn = next_insn_to_validate(file, insn); if (file->c_file && func && insn->func && func != insn->func->pfunc) { WARN("%s() falls through to next function %s()", @@ -2611,25 +2928,67 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - visited = 1 << state.uaccess; - if (insn->visited) { + visited = VISITED_BRANCH << state.uaccess; + if (insn->visited & VISITED_BRANCH_MASK) { if (!insn->hint && !insn_cfi_match(insn, &state.cfi)) return 1; if (insn->visited & visited) return 0; + } else { + nr_insns_visited++; } if (state.noinstr) state.instr += insn->instr; - if (insn->hint) - state.cfi = insn->cfi; - else - insn->cfi = state.cfi; + if (insn->hint) { + if (insn->restore) { + struct instruction *save_insn, *i; + + i = insn; + save_insn = NULL; + + sym_for_each_insn_continue_reverse(file, func, i) { + if (i->save) { + save_insn = i; + break; + } + } + + if (!save_insn) { + WARN_FUNC("no corresponding CFI save for CFI restore", + sec, insn->offset); + return 1; + } + + if (!save_insn->visited) { + WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", + sec, insn->offset); + return 1; + } + + insn->cfi = save_insn->cfi; + nr_cfi_reused++; + } + + state.cfi = *insn->cfi; + } else { + /* XXX track if we actually changed state.cfi */ + + if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) { + insn->cfi = prev_insn->cfi; + nr_cfi_reused++; + } else { + insn->cfi = cfi_hash_find_or_add(&state.cfi); + } + } insn->visited |= visited; + if (propagate_alt_cfi(file, insn)) + return 1; + if (!insn->ignore_alts && !list_empty(&insn->alts)) { bool skip_orig = false; @@ -2645,9 +3004,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, } } - if (insn->alt_group) - fill_alternative_cfi(file, insn); - if (skip_orig) return 0; } @@ -2658,6 +3014,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: + if (sls && !insn->retpoline_safe && + next_insn && next_insn->type != INSN_TRAP) { + WARN_FUNC("missing int3 after ret", + insn->sec, insn->offset); + } return validate_return(func, insn, &state); case INSN_CALL: @@ -2680,7 +3041,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, case INSN_JUMP_CONDITIONAL: case INSN_JUMP_UNCONDITIONAL: - if (func && is_sibling_call(insn)) { + if (is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); if (ret) return ret; @@ -2701,8 +3062,15 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_JUMP_DYNAMIC: + if (sls && !insn->retpoline_safe && + next_insn && next_insn->type != INSN_TRAP) { + WARN_FUNC("missing int3 after indirect jump", + insn->sec, insn->offset); + } + + /* fallthrough */ case INSN_JUMP_DYNAMIC_CONDITIONAL: - if (func && is_sibling_call(insn)) { + if (is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); if (ret) return ret; @@ -2776,6 +3144,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } + prev_insn = insn; insn = next_insn; } @@ -2815,6 +3184,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) return warnings; } +/* + * Validate rethunk entry constraint: must untrain RET before the first RET. + * + * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes + * before an actual RET instruction. + */ +static int validate_entry(struct objtool_file *file, struct instruction *insn) +{ + struct instruction *next, *dest; + int ret, warnings = 0; + + for (;;) { + next = next_insn_to_validate(file, insn); + + if (insn->visited & VISITED_ENTRY) + return 0; + + insn->visited |= VISITED_ENTRY; + + if (!insn->ignore_alts && !list_empty(&insn->alts)) { + struct alternative *alt; + bool skip_orig = false; + + list_for_each_entry(alt, &insn->alts, list) { + if (alt->skip_orig) + skip_orig = true; + + ret = validate_entry(file, alt->insn); + if (ret) { + if (backtrace) + BT_FUNC("(alt)", insn); + return ret; + } + } + + if (skip_orig) + return 0; + } + + switch (insn->type) { + + case INSN_CALL_DYNAMIC: + case INSN_JUMP_DYNAMIC: + case INSN_JUMP_DYNAMIC_CONDITIONAL: + WARN_FUNC("early indirect call", insn->sec, insn->offset); + return 1; + + case INSN_JUMP_UNCONDITIONAL: + case INSN_JUMP_CONDITIONAL: + if (!is_sibling_call(insn)) { + if (!insn->jump_dest) { + WARN_FUNC("unresolved jump target after linking?!?", + insn->sec, insn->offset); + return -1; + } + ret = validate_entry(file, insn->jump_dest); + if (ret) { + if (backtrace) { + BT_FUNC("(branch%s)", insn, + insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : ""); + } + return ret; + } + + if (insn->type == INSN_JUMP_UNCONDITIONAL) + return 0; + + break; + } + + /* fallthrough */ + case INSN_CALL: + dest = find_insn(file, insn->call_dest->sec, + insn->call_dest->offset); + if (!dest) { + WARN("Unresolved function after linking!?: %s", + insn->call_dest->name); + return -1; + } + + ret = validate_entry(file, dest); + if (ret) { + if (backtrace) + BT_FUNC("(call)", insn); + return ret; + } + /* + * If a call returns without error, it must have seen UNTRAIN_RET. + * Therefore any non-error return is a success. + */ + return 0; + + case INSN_RETURN: + WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset); + return 1; + + case INSN_NOP: + if (insn->retpoline_safe) + return 0; + break; + + default: + break; + } + + if (!next) { + WARN_FUNC("teh end!", insn->sec, insn->offset); + return -1; + } + insn = next; + } + + return warnings; +} + +/* + * Validate that all branches starting at 'insn->entry' encounter UNRET_END + * before RET. + */ +static int validate_unret(struct objtool_file *file) +{ + struct instruction *insn; + int ret, warnings = 0; + + for_each_insn(file, insn) { + if (!insn->entry) + continue; + + ret = validate_entry(file, insn); + if (ret < 0) { + WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset); + return ret; + } + warnings += ret; + } + + return warnings; +} + static int validate_retpoline(struct objtool_file *file) { struct instruction *insn; @@ -2822,7 +3330,8 @@ static int validate_retpoline(struct objtool_file *file) for_each_insn(file, insn) { if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC) + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN) continue; if (insn->retpoline_safe) @@ -2837,9 +3346,17 @@ static int validate_retpoline(struct objtool_file *file) if (!strcmp(insn->sec->name, ".init.text") && !module) continue; - WARN_FUNC("indirect %s found in RETPOLINE build", - insn->sec, insn->offset, - insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + if (insn->type == INSN_RETURN) { + if (rethunk) { + WARN_FUNC("'naked' return found in RETHUNK build", + insn->sec, insn->offset); + } else + continue; + } else { + WARN_FUNC("indirect %s found in RETPOLINE build", + insn->sec, insn->offset, + insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + } warnings++; } @@ -2865,7 +3382,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio int i; struct instruction *prev_insn; - if (insn->ignore || insn->type == INSN_NOP) + if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP) return true; /* @@ -2880,9 +3397,6 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio !strcmp(insn->sec->name, ".altinstr_aux")) return true; - if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET) - return true; - if (!insn->func) return false; @@ -2968,10 +3482,7 @@ static int validate_section(struct objtool_file *file, struct section *sec) continue; init_insn_state(&state, sec); - state.cfi.cfa = initial_func_cfi.cfa; - memcpy(&state.cfi.regs, &initial_func_cfi.regs, - CFI_NUM_REGS * sizeof(struct cfi_reg)); - state.cfi.stack_size = initial_func_cfi.cfa.offset; + set_func_state(&state.cfi); warnings += validate_symbol(file, sec, func, &state); } @@ -3037,10 +3548,20 @@ int check(struct objtool_file *file) int ret, warnings = 0; arch_initial_func_cfi_state(&initial_func_cfi); + init_cfi_state(&init_cfi); + init_cfi_state(&func_cfi); + set_func_state(&func_cfi); + + if (!cfi_hash_alloc()) + goto out; + + cfi_hash_add(&init_cfi); + cfi_hash_add(&func_cfi); ret = decode_sections(file); if (ret < 0) goto out; + warnings += ret; if (list_empty(&file->insn_list)) @@ -3072,6 +3593,17 @@ int check(struct objtool_file *file) goto out; warnings += ret; + if (unret) { + /* + * Must be after validate_branch() and friends, it plays + * further games with insn->visited. + */ + ret = validate_unret(file); + if (ret < 0) + return ret; + warnings += ret; + } + if (!warnings) { ret = validate_reachable_instructions(file); if (ret < 0) @@ -3091,6 +3623,27 @@ int check(struct objtool_file *file) warnings += ret; } + if (retpoline) { + ret = create_retpoline_sites_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + + if (rethunk) { + ret = create_return_sites_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + + if (stats) { + printf("nr_insns_visited: %ld\n", nr_insns_visited); + printf("nr_cfi: %ld\n", nr_cfi); + printf("nr_cfi_reused: %ld\n", nr_cfi_reused); + printf("nr_cfi_cache: %ld\n", nr_cfi_cache); + } + out: /* * For now, don't fail the kernel build on fatal warnings. These diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 3a9a3daf6e3d..6daa64d2ee97 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -19,11 +19,28 @@ struct insn_state { s8 instr; }; +struct alt_group { + /* + * Pointer from a replacement group to the original group. NULL if it + * *is* the original group. + */ + struct alt_group *orig_group; + + /* First and last instructions in the group */ + struct instruction *first_insn, *last_insn; + + /* + * Byte-offset-addressed len-sized array of pointers to CFI structs. + * This is shared with the other alt_groups in the same alternative. + */ + struct cfi_state **cfi; +}; + struct instruction { struct list_head list; struct hlist_node hash; - struct list_head static_call_node; struct list_head mcount_loc_node; + struct list_head call_node; struct section *sec; unsigned long offset; unsigned int len; @@ -31,24 +48,28 @@ struct instruction { unsigned long immediate; bool dead_end, ignore, ignore_alts; bool hint; + bool save, restore; bool retpoline_safe; + bool entry; s8 instr; u8 visited; - u8 ret_offset; - int alt_group; + struct alt_group *alt_group; struct symbol *call_dest; struct instruction *jump_dest; struct instruction *first_jump_src; struct reloc *jump_table; + struct reloc *reloc; struct list_head alts; struct symbol *func; struct list_head stack_ops; - struct cfi_state cfi; -#ifdef INSN_USE_ORC - struct orc_entry orc; -#endif + struct cfi_state *cfi; }; +#define VISITED_BRANCH 0x01 +#define VISITED_BRANCH_UACCESS 0x02 +#define VISITED_BRANCH_MASK 0x03 +#define VISITED_ENTRY 0x04 + static inline bool is_static_jump(struct instruction *insn) { return insn->type == INSN_JUMP_CONDITIONAL || diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index d8421e1d06be..5aa3b4e76479 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -262,32 +262,6 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns return find_reloc_by_dest_range(elf, sec, offset, 1); } -void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, - struct reloc *reloc) -{ - if (sec->sym) { - reloc->sym = sec->sym; - reloc->addend = offset; - return; - } - - /* - * The Clang assembler strips section symbols, so we have to reference - * the function symbol instead: - */ - reloc->sym = find_symbol_containing(sec, offset); - if (!reloc->sym) { - /* - * Hack alert. This happens when we need to reference the NOP - * pad insn immediately after the function. - */ - reloc->sym = find_symbol_containing(sec, offset - 1); - } - - if (reloc->sym) - reloc->addend = offset - reloc->sym->offset; -} - static int read_sections(struct elf *elf) { Elf_Scn *s = NULL; @@ -367,12 +341,41 @@ static int read_sections(struct elf *elf) return 0; } +static void elf_add_symbol(struct elf *elf, struct symbol *sym) +{ + struct list_head *entry; + struct rb_node *pnode; + + sym->alias = sym; + + sym->type = GELF_ST_TYPE(sym->sym.st_info); + sym->bind = GELF_ST_BIND(sym->sym.st_info); + + sym->offset = sym->sym.st_value; + sym->len = sym->sym.st_size; + + rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset); + pnode = rb_prev(&sym->node); + if (pnode) + entry = &rb_entry(pnode, struct symbol, node)->list; + else + entry = &sym->sec->symbol_list; + list_add(&sym->list, entry); + elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); + elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); + + /* + * Don't store empty STT_NOTYPE symbols in the rbtree. They + * can exist within a function, confusing the sorting. + */ + if (!sym->len) + rb_erase(&sym->node, &sym->sec->symbol_tree); +} + static int read_symbols(struct elf *elf) { struct section *symtab, *symtab_shndx, *sec; struct symbol *sym, *pfunc; - struct list_head *entry; - struct rb_node *pnode; int symbols_nr, i; char *coldstr; Elf_Data *shndx_data = NULL; @@ -400,7 +403,6 @@ static int read_symbols(struct elf *elf) return -1; } memset(sym, 0, sizeof(*sym)); - sym->alias = sym; sym->idx = i; @@ -417,9 +419,6 @@ static int read_symbols(struct elf *elf) goto err; } - sym->type = GELF_ST_TYPE(sym->sym.st_info); - sym->bind = GELF_ST_BIND(sym->sym.st_info); - if ((sym->sym.st_shndx > SHN_UNDEF && sym->sym.st_shndx < SHN_LORESERVE) || (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) { @@ -432,32 +431,14 @@ static int read_symbols(struct elf *elf) sym->name); goto err; } - if (sym->type == STT_SECTION) { + if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) { sym->name = sym->sec->name; sym->sec->sym = sym; } } else sym->sec = find_section_by_index(elf, 0); - sym->offset = sym->sym.st_value; - sym->len = sym->sym.st_size; - - rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset); - pnode = rb_prev(&sym->node); - if (pnode) - entry = &rb_entry(pnode, struct symbol, node)->list; - else - entry = &sym->sec->symbol_list; - list_add(&sym->list, entry); - elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); - elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); - - /* - * Don't store empty STT_NOTYPE symbols in the rbtree. They - * can exist within a function, confusing the sorting. - */ - if (!sym->len) - rb_erase(&sym->node, &sym->sec->symbol_tree); + elf_add_symbol(elf, sym); } if (stats) @@ -524,12 +505,275 @@ static int read_symbols(struct elf *elf) return -1; } -void elf_add_reloc(struct elf *elf, struct reloc *reloc) -{ - struct section *sec = reloc->sec; +static struct section *elf_create_reloc_section(struct elf *elf, + struct section *base, + int reltype); - list_add_tail(&reloc->list, &sec->reloc_list); +int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, + unsigned int type, struct symbol *sym, s64 addend) +{ + struct reloc *reloc; + + if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA)) + return -1; + + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + + reloc->sec = sec->reloc; + reloc->offset = offset; + reloc->type = type; + reloc->sym = sym; + reloc->addend = addend; + + list_add_tail(&reloc->list, &sec->reloc->reloc_list); elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc)); + + sec->reloc->changed = true; + + return 0; +} + +/* + * Ensure that any reloc section containing references to @sym is marked + * changed such that it will get re-generated in elf_rebuild_reloc_sections() + * with the new symbol index. + */ +static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym) +{ + struct section *sec; + + list_for_each_entry(sec, &elf->sections, list) { + struct reloc *reloc; + + if (sec->changed) + continue; + + list_for_each_entry(reloc, &sec->reloc_list, list) { + if (reloc->sym == sym) { + sec->changed = true; + break; + } + } + } +} + +/* + * The libelf API is terrible; gelf_update_sym*() takes a data block relative + * index value, *NOT* the symbol index. As such, iterate the data blocks and + * adjust index until it fits. + * + * If no data block is found, allow adding a new data block provided the index + * is only one past the end. + */ +static int elf_update_symbol(struct elf *elf, struct section *symtab, + struct section *symtab_shndx, struct symbol *sym) +{ + Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF; + Elf_Data *symtab_data = NULL, *shndx_data = NULL; + Elf64_Xword entsize = symtab->sh.sh_entsize; + int max_idx, idx = sym->idx; + Elf_Scn *s, *t = NULL; + + s = elf_getscn(elf->elf, symtab->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + if (symtab_shndx) { + t = elf_getscn(elf->elf, symtab_shndx->idx); + if (!t) { + WARN_ELF("elf_getscn"); + return -1; + } + } + + for (;;) { + /* get next data descriptor for the relevant sections */ + symtab_data = elf_getdata(s, symtab_data); + if (t) + shndx_data = elf_getdata(t, shndx_data); + + /* end-of-list */ + if (!symtab_data) { + void *buf; + + if (idx) { + /* we don't do holes in symbol tables */ + WARN("index out of range"); + return -1; + } + + /* if @idx == 0, it's the next contiguous entry, create it */ + symtab_data = elf_newdata(s); + if (t) + shndx_data = elf_newdata(t); + + buf = calloc(1, entsize); + if (!buf) { + WARN("malloc"); + return -1; + } + + symtab_data->d_buf = buf; + symtab_data->d_size = entsize; + symtab_data->d_align = 1; + symtab_data->d_type = ELF_T_SYM; + + symtab->sh.sh_size += entsize; + symtab->changed = true; + + if (t) { + shndx_data->d_buf = &sym->sec->idx; + shndx_data->d_size = sizeof(Elf32_Word); + shndx_data->d_align = sizeof(Elf32_Word); + shndx_data->d_type = ELF_T_WORD; + + symtab_shndx->sh.sh_size += sizeof(Elf32_Word); + symtab_shndx->changed = true; + } + + break; + } + + /* empty blocks should not happen */ + if (!symtab_data->d_size) { + WARN("zero size data"); + return -1; + } + + /* is this the right block? */ + max_idx = symtab_data->d_size / entsize; + if (idx < max_idx) + break; + + /* adjust index and try again */ + idx -= max_idx; + } + + /* something went side-ways */ + if (idx < 0) { + WARN("negative index"); + return -1; + } + + /* setup extended section index magic and write the symbol */ + if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) { + sym->sym.st_shndx = shndx; + if (!shndx_data) + shndx = 0; + } else { + sym->sym.st_shndx = SHN_XINDEX; + if (!shndx_data) { + WARN("no .symtab_shndx"); + return -1; + } + } + + if (!gelf_update_symshndx(symtab_data, shndx_data, idx, &sym->sym, shndx)) { + WARN_ELF("gelf_update_symshndx"); + return -1; + } + + return 0; +} + +static struct symbol * +elf_create_section_symbol(struct elf *elf, struct section *sec) +{ + struct section *symtab, *symtab_shndx; + Elf32_Word first_non_local, new_idx; + struct symbol *sym, *old; + + symtab = find_section_by_name(elf, ".symtab"); + if (symtab) { + symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); + } else { + WARN("no .symtab"); + return NULL; + } + + sym = calloc(1, sizeof(*sym)); + if (!sym) { + perror("malloc"); + return NULL; + } + + sym->name = sec->name; + sym->sec = sec; + + // st_name 0 + sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION); + // st_other 0 + // st_value 0 + // st_size 0 + + /* + * Move the first global symbol, as per sh_info, into a new, higher + * symbol index. This fees up a spot for a new local symbol. + */ + first_non_local = symtab->sh.sh_info; + new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize; + old = find_symbol_by_index(elf, first_non_local); + if (old) { + old->idx = new_idx; + + hlist_del(&old->hash); + elf_hash_add(elf->symbol_hash, &old->hash, old->idx); + + elf_dirty_reloc_sym(elf, old); + + if (elf_update_symbol(elf, symtab, symtab_shndx, old)) { + WARN("elf_update_symbol move"); + return NULL; + } + + new_idx = first_non_local; + } + + sym->idx = new_idx; + if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) { + WARN("elf_update_symbol"); + return NULL; + } + + /* + * Either way, we added a LOCAL symbol. + */ + symtab->sh.sh_info += 1; + + elf_add_symbol(elf, sym); + + return sym; +} + +int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int type, + struct section *insn_sec, unsigned long insn_off) +{ + struct symbol *sym = insn_sec->sym; + int addend = insn_off; + + if (!sym) { + /* + * Due to how weak functions work, we must use section based + * relocations. Symbol based relocations would result in the + * weak and non-weak function annotations being overlaid on the + * non-weak function after linking. + */ + sym = elf_create_section_symbol(elf, insn_sec); + if (!sym) + return -1; + + insn_sec->sym = sym; + } + + return elf_add_reloc(elf, sec, offset, type, sym, addend); } static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx) @@ -609,7 +853,9 @@ static int read_relocs(struct elf *elf) return -1; } - elf_add_reloc(elf, reloc); + list_add_tail(&reloc->list, &sec->reloc_list); + elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc)); + nr_reloc++; } max_reloc = max(max_reloc, nr_reloc); @@ -687,13 +933,49 @@ struct elf *elf_open_read(const char *name, int flags) return NULL; } +static int elf_add_string(struct elf *elf, struct section *strtab, char *str) +{ + Elf_Data *data; + Elf_Scn *s; + int len; + + if (!strtab) + strtab = find_section_by_name(elf, ".strtab"); + if (!strtab) { + WARN("can't find .strtab section"); + return -1; + } + + s = elf_getscn(elf->elf, strtab->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return -1; + } + + data->d_buf = str; + data->d_size = strlen(str) + 1; + data->d_align = 1; + data->d_type = ELF_T_SYM; + + len = strtab->len; + strtab->len += data->d_size; + strtab->changed = true; + + return len; +} + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr) { struct section *sec, *shstrtab; size_t size = entsize * nr; Elf_Scn *s; - Elf_Data *data; sec = malloc(sizeof(*sec)); if (!sec) { @@ -750,7 +1032,6 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_addralign = 1; sec->sh.sh_flags = SHF_ALLOC | sh_flags; - /* Add section name to .shstrtab (or .strtab for Clang) */ shstrtab = find_section_by_name(elf, ".shstrtab"); if (!shstrtab) @@ -759,27 +1040,9 @@ struct section *elf_create_section(struct elf *elf, const char *name, WARN("can't find .shstrtab or .strtab section"); return NULL; } - - s = elf_getscn(elf->elf, shstrtab->idx); - if (!s) { - WARN_ELF("elf_getscn"); + sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name); + if (sec->sh.sh_name == -1) return NULL; - } - - data = elf_newdata(s); - if (!data) { - WARN_ELF("elf_newdata"); - return NULL; - } - - data->d_buf = sec->name; - data->d_size = strlen(name) + 1; - data->d_align = 1; - - sec->sh.sh_name = shstrtab->len; - - shstrtab->len += strlen(name) + 1; - shstrtab->changed = true; list_add_tail(&sec->list, &elf->sections); elf_hash_add(elf->section_hash, &sec->hash, sec->idx); @@ -850,7 +1113,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec return sec; } -struct section *elf_create_reloc_section(struct elf *elf, +static struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype) { @@ -920,14 +1183,11 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) return 0; } -int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) +static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) { struct reloc *reloc; int nr; - sec->changed = true; - elf->changed = true; - nr = 0; list_for_each_entry(reloc, &sec->reloc_list, list) nr++; @@ -991,9 +1251,15 @@ int elf_write(struct elf *elf) struct section *sec; Elf_Scn *s; - /* Update section headers for changed sections: */ + /* Update changed relocation sections and section headers: */ list_for_each_entry(sec, &elf->sections, list) { if (sec->changed) { + if (sec->base && + elf_rebuild_reloc_section(elf, sec)) { + WARN("elf_rebuild_reloc_section"); + return -1; + } + s = elf_getscn(elf->elf, sec->idx); if (!s) { WARN_ELF("elf_getscn"); @@ -1005,6 +1271,7 @@ int elf_write(struct elf *elf) } sec->changed = false; + elf->changed = true; } } diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index e6890cc70a25..a1863eb35fbb 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -55,8 +55,12 @@ struct symbol { unsigned long offset; unsigned int len; struct symbol *pfunc, *cfunc, *alias; - bool uaccess_safe; - bool static_call_tramp; + u8 uaccess_safe : 1; + u8 static_call_tramp : 1; + u8 retpoline_thunk : 1; + u8 return_thunk : 1; + u8 fentry : 1; + u8 kcov : 1; }; struct reloc { @@ -70,7 +74,7 @@ struct reloc { struct symbol *sym; unsigned long offset; unsigned int type; - int addend; + s64 addend; int idx; bool jump_table_start; }; @@ -122,8 +126,13 @@ static inline u32 reloc_hash(struct reloc *reloc) struct elf *elf_open_read(const char *name, int flags); struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); -struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); -void elf_add_reloc(struct elf *elf, struct reloc *reloc); + +int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, + unsigned int type, struct symbol *sym, s64 addend); +int elf_add_reloc_to_insn(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int type, + struct section *insn_sec, unsigned long insn_off); + int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int len, const char *insn); @@ -140,9 +149,6 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, unsigned long offset, unsigned int len); struct symbol *find_func_containing(struct section *sec, unsigned long offset); -void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, - struct reloc *reloc); -int elf_rebuild_reloc_section(struct elf *elf, struct section *sec); #define for_each_sec(file, sec) \ list_for_each_entry(sec, &file->elf->sections, list) diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index c1819a6f2a18..f69035522693 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -61,6 +61,8 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.retpoline_call_list); + INIT_LIST_HEAD(&file.return_thunk_list); INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index cf004dd60c2b..efd1a66158e7 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -18,6 +18,8 @@ struct objtool_file { struct elf *elf; struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); + struct list_head retpoline_call_list; + struct list_head return_thunk_list; struct list_head static_call_list; struct list_head mcount_loc_list; bool ignore_unreachables, c_file, hints, rodata; @@ -27,7 +29,6 @@ struct objtool_file *objtool_open_read(const char *_objname); int check(struct objtool_file *file); int orc_dump(const char *objname); -int create_orc(struct objtool_file *file); -int create_orc_sections(struct objtool_file *file); +int orc_create(struct objtool_file *file); #endif /* _OBJTOOL_H */ diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 9ce68b385a1b..812b33ed9f65 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -12,205 +12,231 @@ #include "check.h" #include "warn.h" -int create_orc(struct objtool_file *file) +static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, + struct instruction *insn) { - struct instruction *insn; + struct cfi_reg *bp = &cfi->regs[CFI_BP]; - for_each_insn(file, insn) { - struct orc_entry *orc = &insn->orc; - struct cfi_reg *cfa = &insn->cfi.cfa; - struct cfi_reg *bp = &insn->cfi.regs[CFI_BP]; + memset(orc, 0, sizeof(*orc)); - if (!insn->sec->text) - continue; - - orc->end = insn->cfi.end; - - if (cfa->base == CFI_UNDEFINED) { - orc->sp_reg = ORC_REG_UNDEFINED; - continue; - } - - switch (cfa->base) { - case CFI_SP: - orc->sp_reg = ORC_REG_SP; - break; - case CFI_SP_INDIRECT: - orc->sp_reg = ORC_REG_SP_INDIRECT; - break; - case CFI_BP: - orc->sp_reg = ORC_REG_BP; - break; - case CFI_BP_INDIRECT: - orc->sp_reg = ORC_REG_BP_INDIRECT; - break; - case CFI_R10: - orc->sp_reg = ORC_REG_R10; - break; - case CFI_R13: - orc->sp_reg = ORC_REG_R13; - break; - case CFI_DI: - orc->sp_reg = ORC_REG_DI; - break; - case CFI_DX: - orc->sp_reg = ORC_REG_DX; - break; - default: - WARN_FUNC("unknown CFA base reg %d", - insn->sec, insn->offset, cfa->base); - return -1; - } - - switch(bp->base) { - case CFI_UNDEFINED: - orc->bp_reg = ORC_REG_UNDEFINED; - break; - case CFI_CFA: - orc->bp_reg = ORC_REG_PREV_SP; - break; - case CFI_BP: - orc->bp_reg = ORC_REG_BP; - break; - default: - WARN_FUNC("unknown BP base reg %d", - insn->sec, insn->offset, bp->base); - return -1; - } - - orc->sp_offset = cfa->offset; - orc->bp_offset = bp->offset; - orc->type = insn->cfi.type; + if (!cfi) { + orc->end = 0; + orc->sp_reg = ORC_REG_UNDEFINED; + return 0; } + orc->end = cfi->end; + + if (cfi->cfa.base == CFI_UNDEFINED) { + orc->sp_reg = ORC_REG_UNDEFINED; + return 0; + } + + switch (cfi->cfa.base) { + case CFI_SP: + orc->sp_reg = ORC_REG_SP; + break; + case CFI_SP_INDIRECT: + orc->sp_reg = ORC_REG_SP_INDIRECT; + break; + case CFI_BP: + orc->sp_reg = ORC_REG_BP; + break; + case CFI_BP_INDIRECT: + orc->sp_reg = ORC_REG_BP_INDIRECT; + break; + case CFI_R10: + orc->sp_reg = ORC_REG_R10; + break; + case CFI_R13: + orc->sp_reg = ORC_REG_R13; + break; + case CFI_DI: + orc->sp_reg = ORC_REG_DI; + break; + case CFI_DX: + orc->sp_reg = ORC_REG_DX; + break; + default: + WARN_FUNC("unknown CFA base reg %d", + insn->sec, insn->offset, cfi->cfa.base); + return -1; + } + + switch (bp->base) { + case CFI_UNDEFINED: + orc->bp_reg = ORC_REG_UNDEFINED; + break; + case CFI_CFA: + orc->bp_reg = ORC_REG_PREV_SP; + break; + case CFI_BP: + orc->bp_reg = ORC_REG_BP; + break; + default: + WARN_FUNC("unknown BP base reg %d", + insn->sec, insn->offset, bp->base); + return -1; + } + + orc->sp_offset = cfi->cfa.offset; + orc->bp_offset = bp->offset; + orc->type = cfi->type; + return 0; } -static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec, - unsigned int idx, struct section *insn_sec, - unsigned long insn_off, struct orc_entry *o) +static int write_orc_entry(struct elf *elf, struct section *orc_sec, + struct section *ip_sec, unsigned int idx, + struct section *insn_sec, unsigned long insn_off, + struct orc_entry *o) { struct orc_entry *orc; - struct reloc *reloc; /* populate ORC data */ - orc = (struct orc_entry *)u_sec->data->d_buf + idx; + orc = (struct orc_entry *)orc_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); /* populate reloc for ip */ - reloc = malloc(sizeof(*reloc)); - if (!reloc) { - perror("malloc"); + if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32, + insn_sec, insn_off)) return -1; - } - memset(reloc, 0, sizeof(*reloc)); - - insn_to_reloc_sym_addend(insn_sec, insn_off, reloc); - if (!reloc->sym) { - WARN("missing symbol for insn at offset 0x%lx", - insn_off); - return -1; - } - - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(int); - reloc->sec = ip_relocsec; - - elf_add_reloc(elf, reloc); return 0; } -int create_orc_sections(struct objtool_file *file) -{ - struct instruction *insn, *prev_insn; - struct section *sec, *u_sec, *ip_relocsec; - unsigned int idx; +struct orc_list_entry { + struct list_head list; + struct orc_entry orc; + struct section *insn_sec; + unsigned long insn_off; +}; - struct orc_entry empty = { - .sp_reg = ORC_REG_UNDEFINED, +static int orc_list_add(struct list_head *orc_list, struct orc_entry *orc, + struct section *sec, unsigned long offset) +{ + struct orc_list_entry *entry = malloc(sizeof(*entry)); + + if (!entry) { + WARN("malloc failed"); + return -1; + } + + entry->orc = *orc; + entry->insn_sec = sec; + entry->insn_off = offset; + + list_add_tail(&entry->list, orc_list); + return 0; +} + +static unsigned long alt_group_len(struct alt_group *alt_group) +{ + return alt_group->last_insn->offset + + alt_group->last_insn->len - + alt_group->first_insn->offset; +} + +int orc_create(struct objtool_file *file) +{ + struct section *sec, *orc_sec; + unsigned int nr = 0, idx = 0; + struct orc_list_entry *entry; + struct list_head orc_list; + + struct orc_entry null = { + .sp_reg = ORC_REG_UNDEFINED, .bp_reg = ORC_REG_UNDEFINED, .type = UNWIND_HINT_TYPE_CALL, }; + /* Build a deduplicated list of ORC entries: */ + INIT_LIST_HEAD(&orc_list); + for_each_sec(file, sec) { + struct orc_entry orc, prev_orc = {0}; + struct instruction *insn; + bool empty = true; + + if (!sec->text) + continue; + + sec_for_each_insn(file, sec, insn) { + struct alt_group *alt_group = insn->alt_group; + int i; + + if (!alt_group) { + if (init_orc_entry(&orc, insn->cfi, insn)) + return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; + if (orc_list_add(&orc_list, &orc, sec, + insn->offset)) + return -1; + nr++; + prev_orc = orc; + empty = false; + continue; + } + + /* + * Alternatives can have different stack layout + * possibilities (but they shouldn't conflict). + * Instead of traversing the instructions, use the + * alt_group's flattened byte-offset-addressed CFI + * array. + */ + for (i = 0; i < alt_group_len(alt_group); i++) { + struct cfi_state *cfi = alt_group->cfi[i]; + if (!cfi) + continue; + /* errors are reported on the original insn */ + if (init_orc_entry(&orc, cfi, insn)) + return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; + if (orc_list_add(&orc_list, &orc, insn->sec, + insn->offset + i)) + return -1; + nr++; + prev_orc = orc; + empty = false; + } + + /* Skip to the end of the alt_group */ + insn = alt_group->last_insn; + } + + /* Add a section terminator */ + if (!empty) { + orc_list_add(&orc_list, &null, sec, sec->len); + nr++; + } + } + if (!nr) + return 0; + + /* Create .orc_unwind, .orc_unwind_ip and .rela.orc_unwind_ip sections: */ sec = find_section_by_name(file->elf, ".orc_unwind"); if (sec) { WARN("file already has .orc_unwind section, skipping"); return -1; } - - /* count the number of needed orcs */ - idx = 0; - for_each_sec(file, sec) { - if (!sec->text) - continue; - - prev_insn = NULL; - sec_for_each_insn(file, sec, insn) { - if (!prev_insn || - memcmp(&insn->orc, &prev_insn->orc, - sizeof(struct orc_entry))) { - idx++; - } - prev_insn = insn; - } - - /* section terminator */ - if (prev_insn) - idx++; - } - if (!idx) + orc_sec = elf_create_section(file->elf, ".orc_unwind", 0, + sizeof(struct orc_entry), nr); + if (!orc_sec) return -1; - - /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ - sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx); + sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr); if (!sec) return -1; - ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!ip_relocsec) - return -1; - - /* create .orc_unwind section */ - u_sec = elf_create_section(file->elf, ".orc_unwind", 0, - sizeof(struct orc_entry), idx); - - /* populate sections */ - idx = 0; - for_each_sec(file, sec) { - if (!sec->text) - continue; - - prev_insn = NULL; - sec_for_each_insn(file, sec, insn) { - if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc, - sizeof(struct orc_entry))) { - - if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx, - insn->sec, insn->offset, - &insn->orc)) - return -1; - - idx++; - } - prev_insn = insn; - } - - /* section terminator */ - if (prev_insn) { - if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx, - prev_insn->sec, - prev_insn->offset + prev_insn->len, - &empty)) - return -1; - - idx++; - } + /* Write ORC entries to sections: */ + list_for_each_entry(entry, &orc_list, list) { + if (write_orc_entry(file->elf, orc_sec, sec, idx++, + entry->insn_sec, entry->insn_off, + &entry->orc)) + return -1; } - if (elf_rebuild_reloc_section(file->elf, ip_relocsec)) - return -1; - return 0; } diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 1a2420febd08..aff0cee7bac1 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -55,6 +55,13 @@ void __weak arch_handle_alternative(unsigned short feature, struct special_alt * { } +static void reloc_to_sec_off(struct reloc *reloc, struct section **sec, + unsigned long *off) +{ + *sec = reloc->sym->sec; + *off = reloc->sym->offset + reloc->addend; +} + static int get_alt_entry(struct elf *elf, struct special_entry *entry, struct section *sec, int idx, struct special_alt *alt) @@ -87,14 +94,8 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); return -1; } - if (orig_reloc->sym->type != STT_SECTION) { - WARN_FUNC("don't know how to handle non-section reloc symbol %s", - sec, offset + entry->orig, orig_reloc->sym->name); - return -1; - } - alt->orig_sec = orig_reloc->sym->sec; - alt->orig_off = orig_reloc->addend; + reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off); if (!entry->group || alt->new_len) { new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); @@ -104,8 +105,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, return -1; } - alt->new_sec = new_reloc->sym->sec; - alt->new_off = (unsigned int)new_reloc->addend; + reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off); /* _ASM_EXTABLE_EX hack */ if (alt->new_off >= 0x7ffffff0) @@ -152,7 +152,9 @@ int special_get_alts(struct elf *elf, struct list_head *alts) memset(alt, 0, sizeof(*alt)); ret = get_alt_entry(elf, entry, sec, idx, alt); - if (ret) + if (ret > 0) + continue; + if (ret < 0) return ret; list_add_tail(&alt->list, alts); diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 606a4b5e929f..4bbabaecab14 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -16,11 +16,14 @@ arch/x86/include/asm/emulate_prefix.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk include/linux/static_call_types.h -arch/x86/include/asm/inat.h -I '^#include [\"<]\(asm/\)*inat_types.h[\">]' -arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]' -arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]' -arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]' " + +SYNC_CHECK_FILES=' +arch/x86/include/asm/inat.h +arch/x86/include/asm/insn.h +arch/x86/lib/inat.c +arch/x86/lib/insn.c +' fi check_2 () { @@ -63,3 +66,9 @@ while read -r file_entry; do done <p.show_details >= 2) { - char tname[14 + 2 * 10 + 1]; + char tname[14 + 2 * 11 + 1]; struct thread_data *td; for (p = 0; p < g->p.nr_proc; p++) { for (t = 0; t < g->p.nr_threads; t++) { diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index d5bea5d3cd51..fb7d01f3961b 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -918,8 +918,8 @@ percent_rmt_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, double per_left; double per_right; - per_left = PERCENT(left, lcl_hitm); - per_right = PERCENT(right, lcl_hitm); + per_left = PERCENT(left, rmt_hitm); + per_right = PERCENT(right, rmt_hitm); return per_left - per_right; } @@ -2694,9 +2694,7 @@ static int perf_c2c__report(int argc, const char **argv) "the input file to process"), OPT_INCR('N', "node-info", &c2c.node_info, "show extra node info in report (repeat for more info)"), -#ifdef HAVE_SLANG_SUPPORT OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"), -#endif OPT_BOOLEAN(0, "stats", &c2c.stats_only, "Display only statistic tables (implies --stdio)"), OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, @@ -2725,6 +2723,10 @@ static int perf_c2c__report(int argc, const char **argv) if (argc) usage_with_options(report_c2c_usage, options); +#ifndef HAVE_SLANG_SUPPORT + c2c.use_stdio = true; +#endif + if (c2c.stats_only) c2c.use_stdio = true; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 15ecb1803fb9..9f085aa09c97 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -75,6 +75,13 @@ include/uapi/asm-generic/mman-common.h include/uapi/asm-generic/unistd.h ' +SYNC_CHECK_FILES=' +arch/x86/include/asm/inat.h +arch/x86/include/asm/insn.h +arch/x86/lib/inat.c +arch/x86/lib/insn.c +' + # These copies are under tools/perf/trace/beauty/ as they are not used to in # building object files only by scripts in tools/perf/trace/beauty/ to generate # tables that then gets included in .c files for things like id->string syscall @@ -129,6 +136,10 @@ for i in $FILES; do check $i -B done +for i in $SYNC_CHECK_FILES; do + check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"' +done + # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' @@ -137,10 +148,6 @@ check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' check include/linux/ctype.h '-I "isdigit("' check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include " -B' -check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"' -check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' -check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' -check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"' # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index c679a79aef51..1f20f587e053 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -579,7 +579,7 @@ static int json_events(const char *fn, } else if (json_streq(map, field, "ExtSel")) { char *code = NULL; addfield(map, &code, "", "", val); - eventcode |= strtoul(code, NULL, 0) << 21; + eventcode |= strtoul(code, NULL, 0) << 8; free(code); } else if (json_streq(map, field, "EventName")) { addfield(map, &je.name, "", "", val); diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 75947ef6bc17..5b52ffedf0d5 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -3,6 +3,7 @@ #define __PERF_DATA_H #include +#include enum perf_data_mode { PERF_DATA_MODE_WRITE, diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 94809aed8b44..1cab29d45bfb 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -232,6 +232,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, return NULL; } +static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) +{ + size_t i, phdrnum; + u64 sz; + + if (elf_getphdrnum(elf, &phdrnum)) + return -1; + + for (i = 0; i < phdrnum; i++) { + if (gelf_getphdr(elf, i, phdr) == NULL) + return -1; + + if (phdr->p_type != PT_LOAD) + continue; + + sz = max(phdr->p_memsz, phdr->p_filesz); + if (!sz) + continue; + + if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz)) + return 0; + } + + /* Not found any valid program header */ + return -1; +} + static bool want_demangle(bool is_kernel_sym) { return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; @@ -1181,6 +1208,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, sym.st_value); used_opd = true; } + /* * When loading symbols in a data mapping, ABS symbols (which * has a value of SHN_ABS in its st_shndx) failed at @@ -1217,11 +1245,20 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, goto out_elf_end; } else if ((used_opd && runtime_ss->adjust_symbols) || (!used_opd && syms_ss->adjust_symbols)) { + GElf_Phdr phdr; + + if (elf_read_program_header(syms_ss->elf, + (u64)sym.st_value, &phdr)) { + pr_warning("%s: failed to find program header for " + "symbol: %s st_value: %#" PRIx64 "\n", + __func__, elf_name, (u64)sym.st_value); + continue; + } pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " - "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, - (u64)sym.st_value, (u64)shdr.sh_addr, - (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; + "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", + __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, + (u64)phdr.p_offset); + sym.st_value -= phdr.p_vaddr - phdr.p_offset; } demangled = demangle_sym(dso, kmodule, elf_name); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 424ed19a9d54..ef65f7eed1ec 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4189,6 +4189,7 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) case INTEL_FAM6_HASWELL_X: /* HSX */ case INTEL_FAM6_BROADWELL_X: /* BDX */ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ return (rapl_dram_energy_units = 15.3 / 1000000); default: return (rapl_energy_units); diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index af87118e748e..e8b5bf707cd4 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -1,9 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ +#define _GNU_SOURCE #include #include #include +#include #include #include #include @@ -21,6 +23,7 @@ enum bpf_linum_array_idx { EGRESS_LINUM_IDX, INGRESS_LINUM_IDX, + READ_SK_DST_PORT_LINUM_IDX, __NR_BPF_LINUM_ARRAY_IDX, }; @@ -43,8 +46,16 @@ static __u64 child_cg_id; static int linum_map_fd; static __u32 duration; -static __u32 egress_linum_idx = EGRESS_LINUM_IDX; -static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; +static bool create_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return false; + + if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo")) + return false; + + return true; +} static void print_sk(const struct bpf_sock *sk, const char *prefix) { @@ -92,19 +103,24 @@ static void check_result(void) { struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; struct bpf_sock srv_sk, cli_sk, listen_sk; - __u32 ingress_linum, egress_linum; + __u32 idx, ingress_linum, egress_linum, linum; int err; - err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, - &egress_linum); + idx = EGRESS_LINUM_IDX; + err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum); CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d\n", err, errno); - err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, - &ingress_linum); + idx = INGRESS_LINUM_IDX; + err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum); CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d\n", err, errno); + idx = READ_SK_DST_PORT_LINUM_IDX; + err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum); + ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)"); + ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line"); + memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp)); memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk)); @@ -263,7 +279,7 @@ static void test(void) char buf[DATA_LEN]; /* Prepare listen_fd */ - listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0); /* start_server() has logged the error details */ if (CHECK_FAIL(listen_fd == -1)) goto done; @@ -331,8 +347,12 @@ static void test(void) void test_sock_fields(void) { - struct bpf_link *egress_link = NULL, *ingress_link = NULL; int parent_cg_fd = -1, child_cg_fd = -1; + struct bpf_link *link; + + /* Use a dedicated netns to have a fixed listen port */ + if (!create_netns()) + return; /* Create a cgroup, get fd, and join it */ parent_cg_fd = test__join_cgroup(PARENT_CGROUP); @@ -353,17 +373,20 @@ void test_sock_fields(void) if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n")) goto done; - egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, - child_cg_fd); - if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n", - PTR_ERR(egress_link))) + link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd); + if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)")) goto done; + skel->links.egress_read_sock_fields = link; - ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, - child_cg_fd); - if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n", - PTR_ERR(ingress_link))) + link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd); + if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)")) goto done; + skel->links.ingress_read_sock_fields = link; + + link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd); + if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port")) + goto done; + skel->links.read_sk_dst_port = link; linum_map_fd = bpf_map__fd(skel->maps.linum_map); sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt); @@ -372,8 +395,7 @@ void test_sock_fields(void) test(); done: - bpf_link__destroy(egress_link); - bpf_link__destroy(ingress_link); + test_sock_fields__detach(skel); test_sock_fields__destroy(skel); if (child_cg_fd != -1) close(child_cg_fd); diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 31975c96e2c9..fe43556e1a61 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -94,7 +94,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int); typedef char * (*fn_ptr_arr1_t[10])(int **); -typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int)); +typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int)); struct struct_w_typedefs { int_t a; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 7967348b11af..43b31aa1fcf7 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -12,6 +12,7 @@ enum bpf_linum_array_idx { EGRESS_LINUM_IDX, INGRESS_LINUM_IDX, + READ_SK_DST_PORT_LINUM_IDX, __NR_BPF_LINUM_ARRAY_IDX, }; @@ -250,4 +251,48 @@ int ingress_read_sock_fields(struct __sk_buff *skb) return CG_OK; } +static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk) +{ + __u32 *word = (__u32 *)&sk->dst_port; + return word[0] == bpf_htonl(0xcafe0000); +} + +static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk) +{ + __u16 *half = (__u16 *)&sk->dst_port; + return half[0] == bpf_htons(0xcafe); +} + +static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk) +{ + __u8 *byte = (__u8 *)&sk->dst_port; + return byte[0] == 0xca && byte[1] == 0xfe; +} + +SEC("cgroup_skb/egress") +int read_sk_dst_port(struct __sk_buff *skb) +{ + __u32 linum, linum_idx; + struct bpf_sock *sk; + + linum_idx = READ_SK_DST_PORT_LINUM_IDX; + + sk = skb->sk; + if (!sk) + RET_LOG(); + + /* Ignore everything but the SYN from the client socket */ + if (sk->state != BPF_TCP_SYN_SENT) + return CG_OK; + + if (!sk_dst_port__load_word(sk)) + RET_LOG(); + if (!sk_dst_port__load_half(sk)) + RET_LOG(); + if (!sk_dst_port__load_byte(sk)) + RET_LOG(); + + return CG_OK; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a4c55fcb0e7b..0fb92d9a319b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -100,7 +100,7 @@ struct bpf_test { enum bpf_prog_type prog_type; uint8_t flags; void (*fill_helper)(struct bpf_test *self); - uint8_t runs; + int runs; #define bpf_testdata_struct_t \ struct { \ uint32_t retval, retval_unpriv; \ @@ -1054,7 +1054,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, run_errs = 0; run_successes = 0; - if (!alignment_prevented_execution && fd_prog >= 0) { + if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) { uint32_t expected_val; int i; diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c index 2ad5f974451c..fd3b62a084b9 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c +++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c @@ -239,6 +239,7 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SK_LOOKUP, .expected_attach_type = BPF_SK_LOOKUP, + .runs = -1, }, /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */ { diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index ce13ece08d51..8c224eac93df 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -121,7 +121,25 @@ .result = ACCEPT, }, { - "sk_fullsock(skb->sk): sk->dst_port [narrow load]", + "sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [half load]", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), @@ -139,7 +157,7 @@ .result = ACCEPT, }, { - "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]", + "sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), @@ -149,7 +167,64 @@ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [byte load]", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, +}, +{ + "sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "invalid sock access", +}, +{ + "sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh index 15d9d5896394..3c9c4554d5f6 100755 --- a/tools/testing/selftests/cgroup/test_stress.sh +++ b/tools/testing/selftests/cgroup/test_stress.sh @@ -1,4 +1,4 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -./with_stress.sh -s subsys -s fork ./test_core +./with_stress.sh -s subsys -s fork ${OUTPUT:-.}/test_core diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index 2f37b90ee1a9..f600311fdc6a 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm) void ucall(uint64_t cmd, int nargs, ...) { - struct ucall uc = { - .cmd = cmd, - }; + struct ucall uc = {}; va_list va; int i; + WRITE_ONCE(uc.cmd, cmd); nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; va_start(va, nargs); for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); + WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); va_end(va); - *ucall_exit_mmio_addr = (vm_vaddr_t)&uc; + WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); } uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index ace976d89125..4a11ea2261cb 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -794,10 +794,16 @@ ipv4_ping() setup set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null ipv4_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_novrf log_subsection "With VRF" setup "yes" ipv4_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv4_ping_vrf } ################################################################################ @@ -2261,10 +2267,16 @@ ipv6_ping() log_subsection "No VRF" setup ipv6_ping_novrf + setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_novrf log_subsection "With VRF" setup "yes" ipv6_ping_vrf + setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + ipv6_ping_vrf } ################################################################################ diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index be6fa808d219..54020d05a62b 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1063,6 +1063,7 @@ learning_test() # FDB entry was installed. bridge link set dev $br_port1 flood off + ip link set $host1_if promisc on tc qdisc add dev $host1_if ingress tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop @@ -1073,7 +1074,7 @@ learning_test() tc -j -s filter show dev $host1_if ingress \ | jq -e ".[] | select(.options.handle == 101) \ | select(.options.actions[0].stats.packets == 1)" &> /dev/null - check_fail $? "Packet reached second host when should not" + check_fail $? "Packet reached first host when should not" $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q sleep 1 @@ -1112,6 +1113,7 @@ learning_test() tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower tc qdisc del dev $host1_if ingress + ip link set $host1_if promisc off bridge link set dev $br_port1 flood on @@ -1129,6 +1131,7 @@ flood_test_do() # Add an ACL on `host2_if` which will tell us whether the packet # was flooded to it or not. + ip link set $host2_if promisc on tc qdisc add dev $host2_if ingress tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop @@ -1146,6 +1149,7 @@ flood_test_do() tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower tc qdisc del dev $host2_if ingress + ip link set $host2_if promisc off return $err } diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 160f9cccdfb7..eb09acdcb3ff 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -35,6 +35,8 @@ ALL_TESTS=" police_shared_test police_rx_mirror_test police_tx_mirror_test + police_mtu_rx_test + police_mtu_tx_test " NUM_NETIFS=6 source tc_common.sh @@ -290,6 +292,56 @@ police_tx_mirror_test() police_mirror_common_test $rp2 egress "police tx and mirror" } +police_mtu_common_test() { + RET=0 + + local test_name=$1; shift + local dev=$1; shift + local direction=$1; shift + + tc filter add dev $dev $direction protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police mtu 1042 conform-exceed drop/ok + + # to count "conform" packets + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action drop + + mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ + -t udp sp=12345,dp=54321 -p 1001 -c 10 -q + + mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ + -t udp sp=12345,dp=54321 -p 1000 -c 3 -q + + tc_check_packets "dev $dev $direction" 101 13 + check_err $? "wrong packet counter" + + # "exceed" packets + local overlimits_t0=$(tc_rule_stats_get ${dev} 1 ${direction} .overlimits) + test ${overlimits_t0} = 10 + check_err $? "wrong overlimits, expected 10 got ${overlimits_t0}" + + # "conform" packets + tc_check_packets "dev $h2 ingress" 101 3 + check_err $? "forwarding error" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $dev $direction protocol ip pref 1 handle 101 flower + + log_test "$test_name" +} + +police_mtu_rx_test() +{ + police_mtu_common_test "police mtu (rx)" $rp1 ingress +} + +police_mtu_tx_test() +{ + police_mtu_common_test "police mtu (tx)" $rp2 egress +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 80b5d352702e..dc932fd65363 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -120,7 +120,7 @@ run_all() { run_udp "${ipv4_args}" echo "ipv6" - run_tcp "${ipv4_args}" + run_tcp "${ipv6_args}" run_udp "${ipv6_args}" } diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index b5eef5ffb58e..af3461cb5c40 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -31,7 +31,7 @@ BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" - ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh + ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh pktgen/pktgen_bench_xmit_mode_netif_receive.sh" # Definition of set types: diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index d7e07f4c3d7f..4e15e8167310 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -374,6 +374,45 @@ EOF return $lret } +test_local_dnat_portonly() +{ + local family=$1 + local daddr=$2 + local lret=0 + local sr_s + local sr_r + +ip netns exec "$ns0" nft -f /dev/stdin < #include +#include "../kselftest.h" #include "rseq.h" -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - struct percpu_lock_entry { intptr_t v; } __attribute__((aligned(128))); @@ -168,7 +167,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, for (;;) { struct percpu_list_node *head; intptr_t *targetptr, expectnot, *load; - off_t offset; + long offset; int ret, cpu; cpu = rseq_cpu_start(); diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h new file mode 100644 index 000000000000..876eb6a7f75b --- /dev/null +++ b/tools/testing/selftests/rseq/compiler.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq/compiler.h + * + * Work-around asm goto compiler bugs. + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef RSEQ_COMPILER_H +#define RSEQ_COMPILER_H + +/* + * gcc prior to 4.8.2 miscompiles asm goto. + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * gcc prior to 8.1.0 miscompiles asm goto at O1. + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103908 + * + * clang prior to version 13.0.1 miscompiles asm goto at O2. + * https://github.com/llvm/llvm-project/issues/52735 + * + * Work around these issues by adding a volatile inline asm with + * memory clobber in the fallthrough after the asm goto and at each + * label target. Emit this for all compilers in case other similar + * issues are found in the future. + */ +#define rseq_after_asm_goto() asm volatile ("" : : : "memory") + +#endif /* RSEQ_COMPILER_H_ */ diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 384589095864..e29ecc7158e8 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -161,7 +161,7 @@ unsigned int yield_mod_cnt, nr_abort; " cbnz " INJECT_ASM_REG ", 222b\n" \ "333:\n" -#elif __PPC__ +#elif defined(__PPC__) #define RSEQ_INJECT_INPUT \ , [loop_cnt_1]"m"(loop_cnt[1]) \ @@ -368,9 +368,7 @@ void *test_percpu_spinlock_thread(void *arg) abort(); reps = thread_data->reps; for (i = 0; i < reps; i++) { - int cpu = rseq_cpu_start(); - - cpu = rseq_this_cpu_lock(&data->lock); + int cpu = rseq_this_cpu_lock(&data->lock); data->c[cpu].count++; rseq_percpu_unlock(&data->lock, cpu); #ifndef BENCHMARK @@ -551,7 +549,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, for (;;) { struct percpu_list_node *head; intptr_t *targetptr, expectnot, *load; - off_t offset; + long offset; int ret; cpu = rseq_cpu_start(); diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h new file mode 100644 index 000000000000..a8c44d9af71f --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-abi.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _RSEQ_ABI_H +#define _RSEQ_ABI_H + +/* + * rseq-abi.h + * + * Restartable sequences system call API + * + * Copyright (c) 2015-2022 Mathieu Desnoyers + */ + +#include +#include + +enum rseq_abi_cpu_id_state { + RSEQ_ABI_CPU_ID_UNINITIALIZED = -1, + RSEQ_ABI_CPU_ID_REGISTRATION_FAILED = -2, +}; + +enum rseq_abi_flags { + RSEQ_ABI_FLAG_UNREGISTER = (1 << 0), +}; + +enum rseq_abi_cs_flags_bit { + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, +}; + +enum rseq_abi_cs_flags { + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), +}; + +/* + * struct rseq_abi_cs is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. It is usually declared as + * link-time constant data. + */ +struct rseq_abi_cs { + /* Version of this structure. */ + __u32 version; + /* enum rseq_abi_cs_flags */ + __u32 flags; + __u64 start_ip; + /* Offset from start_ip. */ + __u64 post_commit_offset; + __u64 abort_ip; +} __attribute__((aligned(4 * sizeof(__u64)))); + +/* + * struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. + * + * A single struct rseq_abi per thread is allowed. + */ +struct rseq_abi { + /* + * Restartable sequences cpu_id_start field. Updated by the + * kernel. Read by user-space with single-copy atomicity + * semantics. This field should only be read by the thread which + * registered this data structure. Aligned on 32-bit. Always + * contains a value in the range of possible CPUs, although the + * value may not be the actual current CPU (e.g. if rseq is not + * initialized). This CPU number value should always be compared + * against the value of the cpu_id field before performing a rseq + * commit or returning a value read from a data structure indexed + * using the cpu_id_start value. + */ + __u32 cpu_id_start; + /* + * Restartable sequences cpu_id field. Updated by the kernel. + * Read by user-space with single-copy atomicity semantics. This + * field should only be read by the thread which registered this + * data structure. Aligned on 32-bit. Values + * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED + * have a special semantic: the former means "rseq uninitialized", + * and latter means "rseq initialization failed". This value is + * meant to be read within rseq critical sections and compared + * with the cpu_id_start value previously read, before performing + * the commit instruction, or read and compared with the + * cpu_id_start value before returning a value loaded from a data + * structure indexed using the cpu_id_start value. + */ + __u32 cpu_id; + /* + * Restartable sequences rseq_cs field. + * + * Contains NULL when no critical section is active for the current + * thread, or holds a pointer to the currently active struct rseq_cs. + * + * Updated by user-space, which sets the address of the currently + * active rseq_cs at the beginning of assembly instruction sequence + * block, and set to NULL by the kernel when it restarts an assembly + * instruction sequence block, as well as when the kernel detects that + * it is preempting or delivering a signal outside of the range + * targeted by the rseq_cs. Also needs to be set to NULL by user-space + * before reclaiming memory that contains the targeted struct rseq_cs. + * + * Read and set by the kernel. Set by user-space with single-copy + * atomicity semantics. This field should only be updated by the + * thread which registered this data structure. Aligned on 64-bit. + */ + union { + __u64 ptr64; + + /* + * The "arch" field provides architecture accessor for + * the ptr field based on architecture pointer size and + * endianness. + */ + struct { +#ifdef __LP64__ + __u64 ptr; +#elif defined(__BYTE_ORDER) ? (__BYTE_ORDER == __BIG_ENDIAN) : defined(__BIG_ENDIAN) + __u32 padding; /* Initialized to zero. */ + __u32 ptr; +#else + __u32 ptr; + __u32 padding; /* Initialized to zero. */ +#endif + } arch; + } rseq_cs; + + /* + * Restartable sequences flags field. + * + * This field should only be updated by the thread which + * registered this data structure. Read by the kernel. + * Mainly used for single-stepping through rseq critical sections + * with debuggers. + * + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT + * Inhibit instruction sequence block restart on preemption + * for this thread. + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL + * Inhibit instruction sequence block restart on signal + * delivery for this thread. + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE + * Inhibit instruction sequence block restart on migration for + * this thread. + */ + __u32 flags; +} __attribute__((aligned(4 * sizeof(__u64)))); + +#endif /* _RSEQ_ABI_H */ diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 5943c816c07c..893a11eca9d5 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -147,14 +147,11 @@ do { \ teardown \ "b %l[" __rseq_str(cmpfail_label) "]\n\t" -#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") - static inline __attribute__((always_inline)) int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -185,8 +182,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -198,30 +195,31 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -255,8 +253,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -270,19 +268,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -292,7 +292,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ #ifdef RSEQ_COMPARE_TWICE @@ -316,8 +315,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [count] "Ir" (count) RSEQ_INJECT_INPUT @@ -328,14 +327,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -347,7 +347,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -381,8 +380,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -398,19 +397,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -422,7 +423,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -457,8 +457,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -474,19 +474,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -498,7 +500,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -537,8 +538,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -554,21 +555,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -582,7 +586,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -657,8 +660,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -678,21 +681,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -706,7 +709,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -782,8 +784,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -803,21 +805,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index 200dae9e4208..cbe190a4d005 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -230,8 +230,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -242,24 +242,28 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -287,8 +291,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expectnot] "r" (expectnot), [load] "Qo" (*load), @@ -300,16 +304,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -337,8 +346,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [count] "r" (count) RSEQ_INJECT_INPUT @@ -348,12 +357,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -388,8 +400,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -402,17 +414,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -447,8 +463,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -461,17 +477,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -508,8 +528,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expect] "r" (expect), [v2] "Qo" (*v2), @@ -522,19 +542,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -569,8 +594,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -584,17 +609,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -629,8 +658,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -644,17 +673,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h new file mode 100644 index 000000000000..38c584661571 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-generic-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_GENERIC_THREAD_POINTER +#define _RSEQ_GENERIC_THREAD_POINTER + +#ifdef __cplusplus +extern "C" { +#endif + +/* Use gcc builtin thread pointer. */ +static inline void *rseq_thread_pointer(void) +{ + return __builtin_thread_pointer(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h index e989e7c14b09..878739fae2fd 100644 --- a/tools/testing/selftests/rseq/rseq-mips.h +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -154,14 +154,11 @@ do { \ teardown \ "b %l[" __rseq_str(cmpfail_label) "]\n\t" -#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") - static inline __attribute__((always_inline)) int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -190,8 +187,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -203,14 +200,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -222,11 +216,10 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -258,8 +251,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -273,14 +266,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -295,7 +285,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ #ifdef RSEQ_COMPARE_TWICE @@ -319,8 +308,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [count] "Ir" (count) RSEQ_INJECT_INPUT @@ -331,10 +320,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE @@ -350,7 +337,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -382,8 +368,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -399,14 +385,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -423,7 +406,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -456,8 +438,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -473,14 +455,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -497,7 +476,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -532,8 +510,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -549,14 +527,11 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -577,7 +552,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -649,8 +623,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -670,21 +644,16 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("expected value comparison failed"); #endif } @@ -698,7 +667,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -771,8 +739,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -792,21 +760,16 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h new file mode 100644 index 000000000000..263eee84fb76 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-ppc-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_PPC_THREAD_POINTER +#define _RSEQ_PPC_THREAD_POINTER + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void *rseq_thread_pointer(void) +{ +#ifdef __powerpc64__ + register void *__result asm ("r13"); +#else + register void *__result asm ("r2"); +#endif + asm ("" : "=r" (__result)); + return __result; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h index 76be90196fe4..bab8e0b9fb11 100644 --- a/tools/testing/selftests/rseq/rseq-ppc.h +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -47,10 +47,13 @@ do { \ #ifdef __PPC64__ -#define STORE_WORD "std " -#define LOAD_WORD "ld " -#define LOADX_WORD "ldx " -#define CMP_WORD "cmpd " +#define RSEQ_STORE_LONG(arg) "std%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_STORE_INT(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_LOAD_LONG(arg) "ld%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOAD_INT(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOADX_LONG "ldx " /* From base register ("b" constraint) */ +#define RSEQ_CMP_LONG "cmpd " +#define RSEQ_CMP_LONG_INT "cmpdi " #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ start_ip, post_commit_offset, abort_ip) \ @@ -89,10 +92,13 @@ do { \ #else /* #ifdef __PPC64__ */ -#define STORE_WORD "stw " -#define LOAD_WORD "lwz " -#define LOADX_WORD "lwzx " -#define CMP_WORD "cmpw " +#define RSEQ_STORE_LONG(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_STORE_INT(arg) RSEQ_STORE_LONG(arg) /* To memory ("m" constraint) */ +#define RSEQ_LOAD_LONG(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOAD_INT(arg) RSEQ_LOAD_LONG(arg) /* From memory ("m" constraint) */ +#define RSEQ_LOADX_LONG "lwzx " /* From base register ("b" constraint) */ +#define RSEQ_CMP_LONG "cmpw " +#define RSEQ_CMP_LONG_INT "cmpwi " #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ start_ip, post_commit_offset, abort_ip) \ @@ -125,7 +131,7 @@ do { \ RSEQ_INJECT_ASM(1) \ "lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \ "addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \ - "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ + RSEQ_STORE_INT(rseq_cs) "%%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ __rseq_str(label) ":\n\t" #endif /* #ifdef __PPC64__ */ @@ -136,7 +142,7 @@ do { \ #define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ RSEQ_INJECT_ASM(2) \ - "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \ + RSEQ_LOAD_INT(current_cpu_id) "%%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \ "cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \ "bne- cr7, " __rseq_str(label) "\n\t" @@ -153,25 +159,25 @@ do { \ * RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7) */ #define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ - CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ "bne- cr7, " __rseq_str(label) "\n\t" #define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ - CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \ + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \ "beq- cr7, " __rseq_str(label) "\n\t" #define RSEQ_ASM_OP_STORE(value, var) \ - STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" + RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" /* Load @var to r17 */ #define RSEQ_ASM_OP_R_LOAD(var) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" /* Store r17 to @var */ #define RSEQ_ASM_OP_R_STORE(var) \ - STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" + RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" /* Add @count to r17 */ #define RSEQ_ASM_OP_R_ADD(count) \ @@ -179,11 +185,11 @@ do { \ /* Load (r17 + voffp) to r17 */ #define RSEQ_ASM_OP_R_LOADX(voffp) \ - LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t" + RSEQ_LOADX_LONG "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t" /* TODO: implement a faster memcpy. */ #define RSEQ_ASM_OP_R_MEMCPY() \ - "cmpdi %%r19, 0\n\t" \ + RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \ "beq 333f\n\t" \ "addi %%r20, %%r20, -1\n\t" \ "addi %%r21, %%r21, -1\n\t" \ @@ -191,16 +197,16 @@ do { \ "lbzu %%r18, 1(%%r20)\n\t" \ "stbu %%r18, 1(%%r21)\n\t" \ "addi %%r19, %%r19, -1\n\t" \ - "cmpdi %%r19, 0\n\t" \ + RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \ "bne 222b\n\t" \ "333:\n\t" \ #define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ - STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ __rseq_str(post_commit_label) ":\n\t" #define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ - STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ + RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ __rseq_str(post_commit_label) ":\n\t" static inline __attribute__((always_inline)) @@ -235,8 +241,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -248,23 +254,28 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -301,8 +312,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -316,16 +327,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -359,8 +375,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [count] "r" (count) @@ -372,12 +388,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -419,8 +438,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -436,16 +455,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -489,8 +513,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -506,16 +530,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -560,8 +589,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -577,18 +606,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -635,8 +670,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -653,16 +688,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -711,8 +751,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -729,23 +769,23 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } -#undef STORE_WORD -#undef LOAD_WORD -#undef LOADX_WORD -#undef CMP_WORD - #endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h index 8ef94ad1cbb4..4e6dc5f0cb42 100644 --- a/tools/testing/selftests/rseq/rseq-s390.h +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -165,8 +165,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -178,16 +178,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -198,7 +203,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -233,8 +238,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -248,16 +253,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -288,8 +298,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [count] "r" (count) @@ -301,12 +311,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -347,8 +360,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -364,16 +377,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -426,8 +444,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -443,18 +461,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -534,8 +558,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -555,16 +579,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h index 72750b5905a9..7b53dac1fcdd 100644 --- a/tools/testing/selftests/rseq/rseq-skip.h +++ b/tools/testing/selftests/rseq/rseq-skip.h @@ -13,7 +13,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { return -1; } diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h new file mode 100644 index 000000000000..977c25d758b2 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_THREAD_POINTER +#define _RSEQ_THREAD_POINTER + +#if defined(__x86_64__) || defined(__i386__) +#include "rseq-x86-thread-pointer.h" +#elif defined(__PPC__) +#include "rseq-ppc-thread-pointer.h" +#else +#include "rseq-generic-thread-pointer.h" +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h new file mode 100644 index 000000000000..d3133587d996 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-x86-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers + */ + +#ifndef _RSEQ_X86_THREAD_POINTER +#define _RSEQ_X86_THREAD_POINTER + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if __GNUC_PREREQ (11, 1) +static inline void *rseq_thread_pointer(void) +{ + return __builtin_thread_pointer(); +} +#else +static inline void *rseq_thread_pointer(void) +{ + void *__result; + +# ifdef __x86_64__ + __asm__ ("mov %%fs:0, %0" : "=r" (__result)); +# else + __asm__ ("mov %%gs:0, %0" : "=r" (__result)); +# endif + return __result; +} +#endif /* !GCC 11 */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index 640411518e46..bd01dc41ca13 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -28,6 +28,8 @@ #ifdef __x86_64__ +#define RSEQ_ASM_TP_SEGMENT %%fs + #define rseq_smp_mb() \ __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") #define rseq_smp_rmb() rseq_barrier() @@ -123,14 +125,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -141,7 +143,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -152,16 +154,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -172,7 +179,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -184,15 +191,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movq %[v], %%rbx\n\t" "cmpq %%rbx, %[expectnot]\n\t" "je %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movq %[v], %%rbx\n\t" "cmpq %%rbx, %[expectnot]\n\t" "je %l[error2]\n\t" @@ -207,7 +214,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -220,16 +227,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -245,11 +257,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* final store */ "addq %[count], %[v]\n\t" @@ -258,7 +270,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [count] "er" (count) @@ -269,12 +281,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -286,7 +301,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) * *pval += inc; */ static inline __attribute__((always_inline)) -int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) +int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu) { RSEQ_INJECT_C(9) @@ -296,11 +311,11 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* get p+v */ "movq %[ptr], %%rbx\n\t" @@ -314,7 +329,7 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [ptr] "m" (*ptr), [off] "er" (off), @@ -351,14 +366,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -372,7 +387,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -387,16 +402,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -426,8 +446,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" @@ -436,7 +456,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" "cmpq %[v2], %[expect2]\n\t" @@ -449,7 +469,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -464,18 +484,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -500,14 +526,14 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "movq %[dst], %[rseq_scratch1]\n\t" "movq %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "cmpq %[v], %[expect]\n\t" "jnz 7f\n\t" #endif @@ -555,7 +581,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -574,16 +600,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -600,7 +631,9 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif /* !RSEQ_SKIP_FASTPATH */ -#elif __i386__ +#elif defined(__i386__) + +#define RSEQ_ASM_TP_SEGMENT %%gs #define rseq_smp_mb() \ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") @@ -701,14 +734,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -719,7 +752,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -730,16 +763,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -750,7 +788,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -762,15 +800,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[v], %%ebx\n\t" "cmpl %%ebx, %[expectnot]\n\t" "je %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movl %[v], %%ebx\n\t" "cmpl %%ebx, %[expectnot]\n\t" "je %l[error2]\n\t" @@ -785,7 +823,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -798,16 +836,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -823,11 +866,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* final store */ "addl %[count], %[v]\n\t" @@ -836,7 +879,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [count] "ir" (count) @@ -847,12 +890,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -872,14 +918,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -894,7 +940,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "m" (newv2), @@ -909,16 +955,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -938,15 +989,15 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %[v], %%eax\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movl %[expect], %%eax\n\t" "cmpl %[v], %%eax\n\t" "jnz %l[error2]\n\t" @@ -962,7 +1013,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -977,16 +1028,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif @@ -1008,8 +1064,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" @@ -1018,7 +1074,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" "cmpl %[expect2], %[v2]\n\t" @@ -1032,7 +1088,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -1047,18 +1103,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -1084,15 +1146,15 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "movl %[dst], %[rseq_scratch1]\n\t" "movl %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 7f\n\t" @@ -1142,7 +1204,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "m" (expect), @@ -1161,16 +1223,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -1196,15 +1263,15 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "movl %[dst], %[rseq_scratch1]\n\t" "movl %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 7f\n\t" @@ -1255,7 +1322,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "m" (expect), @@ -1274,16 +1341,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 7159eb777fd3..986b9458efb2 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -26,104 +26,114 @@ #include #include #include +#include +#include +#include "../kselftest.h" #include "rseq.h" -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +static const ptrdiff_t *libc_rseq_offset_p; +static const unsigned int *libc_rseq_size_p; +static const unsigned int *libc_rseq_flags_p; -__thread volatile struct rseq __rseq_abi = { - .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, -}; +/* Offset from the thread pointer to the rseq area. */ +ptrdiff_t rseq_offset; -/* - * Shared with other libraries. This library may take rseq ownership if it is - * still 0 when executing the library constructor. Set to 1 by library - * constructor when handling rseq. Set to 0 in destructor if handling rseq. - */ -int __rseq_handled; +/* Size of the registered rseq area. 0 if the registration was + unsuccessful. */ +unsigned int rseq_size = -1U; + +/* Flags used during rseq registration. */ +unsigned int rseq_flags; -/* Whether this library have ownership of rseq registration. */ static int rseq_ownership; -static __thread volatile uint32_t __rseq_refcount; +static +__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = { + .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, +}; -static void signal_off_save(sigset_t *oldset) -{ - sigset_t set; - int ret; - - sigfillset(&set); - ret = pthread_sigmask(SIG_BLOCK, &set, oldset); - if (ret) - abort(); -} - -static void signal_restore(sigset_t oldset) -{ - int ret; - - ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL); - if (ret) - abort(); -} - -static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, +static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) { return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); } +int rseq_available(void) +{ + int rc; + + rc = sys_rseq(NULL, 0, 0, 0); + if (rc != -1) + abort(); + switch (errno) { + case ENOSYS: + return 0; + case EINVAL: + return 1; + default: + abort(); + } +} + int rseq_register_current_thread(void) { - int rc, ret = 0; - sigset_t oldset; + int rc; - if (!rseq_ownership) + if (!rseq_ownership) { + /* Treat libc's ownership as a successful registration. */ return 0; - signal_off_save(&oldset); - if (__rseq_refcount == UINT_MAX) { - ret = -1; - goto end; } - if (__rseq_refcount++) - goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); - if (!rc) { - assert(rseq_current_cpu_raw() >= 0); - goto end; - } - if (errno != EBUSY) - __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; - ret = -1; - __rseq_refcount--; -end: - signal_restore(oldset); - return ret; + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG); + if (rc) + return -1; + assert(rseq_current_cpu_raw() >= 0); + return 0; } int rseq_unregister_current_thread(void) { - int rc, ret = 0; - sigset_t oldset; + int rc; - if (!rseq_ownership) + if (!rseq_ownership) { + /* Treat libc's ownership as a successful unregistration. */ return 0; - signal_off_save(&oldset); - if (!__rseq_refcount) { - ret = -1; - goto end; } - if (--__rseq_refcount) - goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), - RSEQ_FLAG_UNREGISTER, RSEQ_SIG); - if (!rc) - goto end; - __rseq_refcount = 1; - ret = -1; -end: - signal_restore(oldset); - return ret; + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + if (rc) + return -1; + return 0; +} + +static __attribute__((constructor)) +void rseq_init(void) +{ + libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); + libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); + libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) { + /* rseq registration owned by glibc */ + rseq_offset = *libc_rseq_offset_p; + rseq_size = *libc_rseq_size_p; + rseq_flags = *libc_rseq_flags_p; + return; + } + if (!rseq_available()) + return; + rseq_ownership = 1; + rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); + rseq_size = sizeof(struct rseq_abi); + rseq_flags = 0; +} + +static __attribute__((destructor)) +void rseq_exit(void) +{ + if (!rseq_ownership) + return; + rseq_offset = 0; + rseq_size = -1U; + rseq_ownership = 0; } int32_t rseq_fallback_current_cpu(void) @@ -137,20 +147,3 @@ int32_t rseq_fallback_current_cpu(void) } return cpu; } - -void __attribute__((constructor)) rseq_init(void) -{ - /* Check whether rseq is handled by another library. */ - if (__rseq_handled) - return; - __rseq_handled = 1; - rseq_ownership = 1; -} - -void __attribute__((destructor)) rseq_fini(void) -{ - if (!rseq_ownership) - return; - __rseq_handled = 0; - rseq_ownership = 0; -} diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 3f63eb362b92..9d850b290c2e 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -16,7 +16,9 @@ #include #include #include -#include +#include +#include "rseq-abi.h" +#include "compiler.h" /* * Empty code injection macros, override when testing. @@ -43,8 +45,20 @@ #define RSEQ_INJECT_FAILED #endif -extern __thread volatile struct rseq __rseq_abi; -extern int __rseq_handled; +#include "rseq-thread-pointer.h" + +/* Offset from the thread pointer to the rseq area. */ +extern ptrdiff_t rseq_offset; +/* Size of the registered rseq area. 0 if the registration was + unsuccessful. */ +extern unsigned int rseq_size; +/* Flags used during rseq registration. */ +extern unsigned int rseq_flags; + +static inline struct rseq_abi *rseq_get_abi(void) +{ + return (struct rseq_abi *) ((uintptr_t) rseq_thread_pointer() + rseq_offset); +} #define rseq_likely(x) __builtin_expect(!!(x), 1) #define rseq_unlikely(x) __builtin_expect(!!(x), 0) @@ -108,7 +122,7 @@ int32_t rseq_fallback_current_cpu(void); */ static inline int32_t rseq_current_cpu_raw(void) { - return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id); + return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id); } /* @@ -124,7 +138,7 @@ static inline int32_t rseq_current_cpu_raw(void) */ static inline uint32_t rseq_cpu_start(void) { - return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start); + return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id_start); } static inline uint32_t rseq_current_cpu(void) @@ -139,11 +153,7 @@ static inline uint32_t rseq_current_cpu(void) static inline void rseq_clear_rseq_cs(void) { -#ifdef __LP64__ - __rseq_abi.rseq_cs.ptr = 0; -#else - __rseq_abi.rseq_cs.ptr.ptr32 = 0; -#endif + RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0); } /* diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index 0d7bbe49359d..1b25cc7c64bb 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -5,7 +5,8 @@ virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h -LDFLAGS += -lpthread +CFLAGS += -pthread +LDFLAGS += -pthread vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9cd8ca2d8bc1..c5dbac10c372 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3644,8 +3644,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, kvm_put_kvm_no_destroy(kvm); mutex_lock(&kvm->lock); list_del(&dev->vm_node); + if (ops->release) + ops->release(dev); mutex_unlock(&kvm->lock); - ops->destroy(dev); + if (ops->destroy) + ops->destroy(dev); return ret; }