This is the 6.1.27 stable release

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmRO+SoACgkQONu9yGCS
 aT4zTA//UkIj0wfAZ3s3WAx8X0yFZKh5d/Fr0l50gbFt2Lvd4xA1vLNJVZb3rUGF
 +FWxpkAbvIuQcYdqHx4JjUuvnTykNGi5JPbXt4lXQpH8/EgsjY1PKSrG3Z9aRTpO
 4HMo8wzIEYuuJWoOlKtXQQirKnrmDkCgPRZmS9wIT2CwtPbbWpfrOyzRNJ30p4VU
 EqZ/EXd1eSX3rpNlq1ksi8BlEkaK/+VEnOx0R1VjCcvi4QldencQFzYdj2xhb33r
 7o9e0cnE/Y7RzV2ArwVn/41QBXscgKU2IqNeujcSsrsvLmI8YtRnSnvrORwV1zWz
 LDRArSt1DvMUq5JKvnLMkpbCXdTOWX2E5EwllVGx4w+8GdFzKgFKmt1q6mCtduCo
 M4ex82j29gJkDf3u0oZkikaMtOXTA+/deBKgbC1NbMDWMrW1fyl68ovXxjEVfs1A
 cUshn3jgSknL1MB4wb2ItTi+Qun6FHLaDhL+YuZuw/OytxIKltX5HzgWQYCrm4b4
 qXd6AvGmE/aJAR99t/2lC+Snhdmrw0DNQJCtxJ9HVI1sp16jYLlCFbPQiMAGAaXJ
 1i174ssXd6PQebNo0cWAr/yRMpRakqiSSWla/a0z71TGbMtL9WJp9f+ZnUWHjJAI
 9wf5pmnyst6TmB24xiGltGOBW42SCyx7mipoSUwEurqe1j5sPJs=
 =pA4R
 -----END PGP SIGNATURE-----

Merge 6.1.27 into android14-6.1-lts

Changes in 6.1.27
	um: Only disable SSE on clang to work around old GCC bugs
	phy: phy-brcm-usb: Utilize platform_get_irq_byname_optional()
	KVM: arm64: Retry fault if vma_lookup() results become invalid
	mm/mempolicy: fix use-after-free of VMA iterator
	mptcp: stops worker on unaccepted sockets at listener close
	mptcp: fix accept vs worker race
	wifi: brcmfmac: slab-out-of-bounds read in brcmf_get_assoc_ies()
	drm/fb-helper: set x/yres_virtual in drm_fb_helper_check_var
	gpiolib: acpi: Add a ignore wakeup quirk for Clevo NL5xNU
	bluetooth: Perform careful capability checks in hci_sock_ioctl()
	btrfs: fix uninitialized variable warnings
	USB: serial: option: add UNISOC vendor and TOZED LT70C product
	driver core: Don't require dynamic_debug for initcall_debug probe timing
	riscv: Move early dtb mapping into the fixmap region
	riscv: Do not set initial_boot_params to the linear address of the dtb
	riscv: No need to relocate the dtb as it lies in the fixmap region
	Linux 6.1.27

Change-Id: I25f8ead528e2f8f2694bce3eb16a3e02aa969c53
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman 2023-06-08 11:27:30 +00:00
commit 9fcc8018f0
21 changed files with 312 additions and 180 deletions

View File

@ -47,7 +47,7 @@ RISC-V Linux Kernel SV39
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap
ffffffc6fea00000 | -228 GB | ffffffc6feffffff | 6 MB | fixmap
ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io
ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap
ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space
@ -83,7 +83,7 @@ RISC-V Linux Kernel SV48
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
ffff8d7ffee00000 | -114.5 TB | ffff8d7ffeffffff | 2 MB | fixmap
ffff8d7ffea00000 | -114.5 TB | ffff8d7ffeffffff | 6 MB | fixmap
ffff8d7fff000000 | -114.5 TB | ffff8d7fffffffff | 16 MB | PCI io
ffff8d8000000000 | -114.5 TB | ffff8f7fffffffff | 2 TB | vmemmap
ffff8f8000000000 | -112.5 TB | ffffaf7fffffffff | 32 TB | vmalloc/ioremap space

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
PATCHLEVEL = 1
SUBLEVEL = 26
SUBLEVEL = 27
EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth

View File

@ -1451,6 +1451,20 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
/*
* Permission faults just need to update the existing leaf entry,
* and so normally don't require allocations from the memcache. The
* only exception to this is when dirty logging is enabled at runtime
* and a write fault needs to collapse a block entry into a table.
*/
if (fault_status != FSC_PERM ||
(logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache,
kvm_mmu_cache_min_pages(kvm));
if (ret)
return ret;
}
/*
* Let's check if we will get back a huge page backed by hugetlbfs, or
* get block mapping for device MMIO region.
@ -1507,36 +1521,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
fault_ipa &= ~(vma_pagesize - 1);
gfn = fault_ipa >> PAGE_SHIFT;
mmap_read_unlock(current->mm);
/*
* Permission faults just need to update the existing leaf entry,
* and so normally don't require allocations from the memcache. The
* only exception to this is when dirty logging is enabled at runtime
* and a write fault needs to collapse a block entry into a table.
*/
if (fault_status != FSC_PERM || (logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache,
kvm_mmu_cache_min_pages(kvm));
if (ret)
return ret;
}
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
/*
* Ensure the read of mmu_invalidate_seq happens before we call
* gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
* the page we just got a reference to gets unmapped before we have a
* chance to grab the mmu_lock, which ensure that if the page gets
* unmapped afterwards, the call to kvm_unmap_gfn will take it away
* from us again properly. This smp_rmb() interacts with the smp_wmb()
* in kvm_mmu_notifier_invalidate_<page|range_end>.
* Read mmu_invalidate_seq so that KVM can detect if the results of
* vma_lookup() or __gfn_to_pfn_memslot() become stale prior to
* acquiring kvm->mmu_lock.
*
* Besides, __gfn_to_pfn_memslot() instead of gfn_to_pfn_prot() is
* used to avoid unnecessary overhead introduced to locate the memory
* slot because it's always fixed even @gfn is adjusted for huge pages.
* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
* with the smp_wmb() in kvm_mmu_invalidate_end().
*/
smp_rmb();
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
mmap_read_unlock(current->mm);
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
write_fault, &writable, NULL);

View File

@ -22,6 +22,14 @@
*/
enum fixed_addresses {
FIX_HOLE,
/*
* The fdt fixmap mapping must be PMD aligned and will be mapped
* using PMD entries in fixmap_pmd in 64-bit and a PGD entry in 32-bit.
*/
FIX_FDT_END,
FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
/* Below fixmaps will be mapped using fixmap_pte */
FIX_PTE,
FIX_PMD,
FIX_PUD,

View File

@ -87,9 +87,13 @@
#define FIXADDR_TOP PCI_IO_START
#ifdef CONFIG_64BIT
#define FIXADDR_SIZE PMD_SIZE
#define MAX_FDT_SIZE PMD_SIZE
#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M)
#define FIXADDR_SIZE (PMD_SIZE + FIX_FDT_SIZE)
#else
#define FIXADDR_SIZE PGDIR_SIZE
#define MAX_FDT_SIZE PGDIR_SIZE
#define FIX_FDT_SIZE MAX_FDT_SIZE
#define FIXADDR_SIZE (PGDIR_SIZE + FIX_FDT_SIZE)
#endif
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)

View File

@ -278,12 +278,8 @@ void __init setup_arch(char **cmdline_p)
#if IS_ENABLED(CONFIG_BUILTIN_DTB)
unflatten_and_copy_device_tree();
#else
if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa))))
unflatten_device_tree();
else
pr_err("No DTB found in kernel mappings\n");
unflatten_device_tree();
#endif
early_init_fdt_scan_reserved_mem();
misc_mem_init();
init_resources();

View File

@ -57,7 +57,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
EXPORT_SYMBOL(empty_zero_page);
extern char _start[];
#define DTB_EARLY_BASE_VA PGDIR_SIZE
void *_dtb_early_va __initdata;
uintptr_t _dtb_early_pa __initdata;
@ -236,31 +235,22 @@ static void __init setup_bootmem(void)
set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
reserve_initrd_mem();
/*
* No allocation should be done before reserving the memory as defined
* in the device tree, otherwise the allocation could end up in a
* reserved region.
*/
early_init_fdt_scan_reserved_mem();
/*
* If DTB is built in, no need to reserve its memblock.
* Otherwise, do reserve it but avoid using
* early_init_fdt_reserve_self() since __pa() does
* not work for DTB pointers that are fixmap addresses
*/
if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) {
/*
* In case the DTB is not located in a memory region we won't
* be able to locate it later on via the linear mapping and
* get a segfault when accessing it via __va(dtb_early_pa).
* To avoid this situation copy DTB to a memory region.
* Note that memblock_phys_alloc will also reserve DTB region.
*/
if (!memblock_is_memory(dtb_early_pa)) {
size_t fdt_size = fdt_totalsize(dtb_early_va);
phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE);
void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size);
memcpy(new_dtb_early_va, dtb_early_va, fdt_size);
early_memunmap(new_dtb_early_va, fdt_size);
_dtb_early_pa = new_dtb_early_pa;
} else
memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
}
if (!IS_ENABLED(CONFIG_BUILTIN_DTB))
memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
dma_contiguous_reserve(dma32_phys_limit);
if (IS_ENABLED(CONFIG_64BIT))
@ -279,9 +269,6 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
@ -626,9 +613,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp,
#define trampoline_pgd_next (pgtable_l5_enabled ? \
(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \
(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
#define early_dtb_pgd_next (pgtable_l5_enabled ? \
(uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \
(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
@ -636,7 +620,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp,
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
@ -859,32 +842,28 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
* this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
* entry.
*/
static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
static void __init create_fdt_early_page_table(pgd_t *pgdir,
uintptr_t fix_fdt_va,
uintptr_t dtb_pa)
{
#ifndef CONFIG_BUILTIN_DTB
uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
PGDIR_SIZE,
IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
#ifndef CONFIG_BUILTIN_DTB
/* Make sure the fdt fixmap address is always aligned on PMD size */
BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE));
if (pgtable_l5_enabled)
create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
(uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
if (pgtable_l4_enabled)
create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
(uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
if (IS_ENABLED(CONFIG_64BIT)) {
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
/* In 32-bit only, the fdt lies in its own PGD */
if (!IS_ENABLED(CONFIG_64BIT)) {
create_pgd_mapping(early_pg_dir, fix_fdt_va,
pa, MAX_FDT_SIZE, PAGE_KERNEL);
} else {
create_pmd_mapping(fixmap_pmd, fix_fdt_va,
pa, PMD_SIZE, PAGE_KERNEL);
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE,
pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
}
dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1));
#else
/*
* For 64-bit kernel, __va can't be used since it would return a linear
@ -1054,7 +1033,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
create_kernel_page_table(early_pg_dir, true);
/* Setup early mapping for FDT early scan */
create_fdt_early_page_table(early_pg_dir, dtb_pa);
create_fdt_early_page_table(early_pg_dir,
__fix_to_virt(FIX_FDT), dtb_pa);
/*
* Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
@ -1096,6 +1076,16 @@ static void __init setup_vm_final(void)
u64 i;
/* Setup swapper PGD for fixmap */
#if !defined(CONFIG_64BIT)
/*
* In 32-bit, the device tree lies in a pgd entry, so it must be copied
* directly in swapper_pg_dir in addition to the pgd entry that points
* to fixmap_pte.
*/
unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT));
set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]);
#endif
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);

View File

@ -3,9 +3,14 @@ core-y += arch/x86/crypto/
#
# Disable SSE and other FP/SIMD instructions to match normal x86
# This is required to work around issues in older LLVM versions, but breaks
# GCC versions < 11. See:
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652
#
ifeq ($(CONFIG_CC_IS_CLANG),y)
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
endif
ifeq ($(CONFIG_X86_32),y)
START := 0x8048000

View File

@ -718,7 +718,12 @@ static int really_probe_debug(struct device *dev, struct device_driver *drv)
calltime = ktime_get();
ret = really_probe(dev, drv);
rettime = ktime_get();
pr_debug("probe of %s returned %d after %lld usecs\n",
/*
* Don't change this to pr_debug() because that requires
* CONFIG_DYNAMIC_DEBUG and we want a simple 'initcall_debug' on the
* kernel commandline to print this all the time at the debug level.
*/
printk(KERN_DEBUG "probe of %s returned %d after %lld usecs\n",
dev_name(dev), ret, ktime_us_delta(rettime, calltime));
return ret;
}

View File

@ -1600,6 +1600,19 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = {
.ignore_interrupt = "AMDI0030:00@18",
},
},
{
/*
* Spurious wakeups from TP_ATTN# pin
* Found in BIOS 1.7.8
* https://gitlab.freedesktop.org/drm/amd/-/issues/1722#note_1720627
*/
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"),
},
.driver_data = &(struct acpi_gpiolib_dmi_quirk) {
.ignore_wake = "ELAN0415:00@9",
},
},
{
/*
* Spurious wakeups from TP_ATTN# pin

View File

@ -1406,6 +1406,9 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
return -EINVAL;
}
var->xres_virtual = fb->width;
var->yres_virtual = fb->height;
/*
* Workaround for SDL 1.2, which is known to be setting all pixel format
* fields values to zero in some cases. We treat this situation as a

View File

@ -5888,6 +5888,11 @@ static s32 brcmf_get_assoc_ies(struct brcmf_cfg80211_info *cfg,
(struct brcmf_cfg80211_assoc_ielen_le *)cfg->extra_buf;
req_len = le32_to_cpu(assoc_info->req_len);
resp_len = le32_to_cpu(assoc_info->resp_len);
if (req_len > WL_EXTRA_BUF_MAX || resp_len > WL_EXTRA_BUF_MAX) {
bphy_err(drvr, "invalid lengths in assoc info: req %u resp %u\n",
req_len, resp_len);
return -EINVAL;
}
if (req_len) {
err = brcmf_fil_iovar_data_get(ifp, "assoc_req_ies",
cfg->extra_buf,

View File

@ -445,9 +445,9 @@ static int brcm_usb_phy_dvr_init(struct platform_device *pdev,
priv->suspend_clk = NULL;
}
priv->wake_irq = platform_get_irq_byname(pdev, "wake");
priv->wake_irq = platform_get_irq_byname_optional(pdev, "wake");
if (priv->wake_irq < 0)
priv->wake_irq = platform_get_irq_byname(pdev, "wakeup");
priv->wake_irq = platform_get_irq_byname_optional(pdev, "wakeup");
if (priv->wake_irq >= 0) {
err = devm_request_irq(dev, priv->wake_irq,
brcm_usb_phy_wake_isr, 0,

View File

@ -595,6 +595,11 @@ static void option_instat_callback(struct urb *urb);
#define SIERRA_VENDOR_ID 0x1199
#define SIERRA_PRODUCT_EM9191 0x90d3
/* UNISOC (Spreadtrum) products */
#define UNISOC_VENDOR_ID 0x1782
/* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */
#define TOZED_PRODUCT_LT70C 0x4055
/* Device flags */
/* Highest interface number which can be used with NCTRL() and RSVD() */
@ -2225,6 +2230,7 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) },
{ } /* Terminating entry */
};
MODULE_DEVICE_TABLE(usb, option_ids);

View File

@ -1658,7 +1658,7 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
int left_ret;
int right_ret;
u64 left_gen;
u64 right_gen;
u64 right_gen = 0;
struct btrfs_inode_info info;
ret = get_inode_info(sctx->send_root, ino, &info);

View File

@ -2631,7 +2631,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
struct super_block *sb = fs_info->sb;
struct rcu_string *name;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_fs_devices *seed_devices;
struct btrfs_fs_devices *seed_devices = NULL;
u64 orig_super_total_bytes;
u64 orig_super_num_devices;
int ret = 0;

View File

@ -784,70 +784,56 @@ static int vma_replace_policy(struct vm_area_struct *vma,
return err;
}
/* Step 2: apply policy to a range and do splits. */
static int mbind_range(struct mm_struct *mm, unsigned long start,
unsigned long end, struct mempolicy *new_pol)
/* Split or merge the VMA (if required) and apply the new policy */
static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct vm_area_struct **prev, unsigned long start,
unsigned long end, struct mempolicy *new_pol)
{
MA_STATE(mas, &mm->mm_mt, start, start);
struct vm_area_struct *prev;
struct vm_area_struct *vma;
int err = 0;
struct vm_area_struct *merged;
unsigned long vmstart, vmend;
pgoff_t pgoff;
int err;
prev = mas_prev(&mas, 0);
if (unlikely(!prev))
mas_set(&mas, start);
vma = mas_find(&mas, end - 1);
if (WARN_ON(!vma))
return 0;
if (start > vma->vm_start)
prev = vma;
for (; vma; vma = mas_next(&mas, end - 1)) {
unsigned long vmstart = max(start, vma->vm_start);
unsigned long vmend = min(end, vma->vm_end);
if (mpol_equal(vma_policy(vma), new_pol))
goto next;
pgoff = vma->vm_pgoff +
((vmstart - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff,
new_pol, vma->vm_userfaultfd_ctx,
anon_vma_name(vma));
if (prev) {
/* vma_merge() invalidated the mas */
mas_pause(&mas);
vma = prev;
goto replace;
}
if (vma->vm_start != vmstart) {
err = split_vma(vma->vm_mm, vma, vmstart, 1);
if (err)
goto out;
/* split_vma() invalidated the mas */
mas_pause(&mas);
}
if (vma->vm_end != vmend) {
err = split_vma(vma->vm_mm, vma, vmend, 0);
if (err)
goto out;
/* split_vma() invalidated the mas */
mas_pause(&mas);
}
replace:
err = vma_replace_policy(vma, new_pol);
if (err)
goto out;
next:
prev = vma;
vmend = min(end, vma->vm_end);
if (start > vma->vm_start) {
*prev = vma;
vmstart = start;
} else {
vmstart = vma->vm_start;
}
out:
return err;
if (mpol_equal(vma_policy(vma), new_pol))
return 0;
pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT);
merged = vma_merge(vma->vm_mm, *prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff, new_pol,
vma->vm_userfaultfd_ctx, anon_vma_name(vma));
if (merged) {
*prev = merged;
/* vma_merge() invalidated the mas */
mas_pause(&vmi->mas);
return vma_replace_policy(merged, new_pol);
}
if (vma->vm_start != vmstart) {
err = split_vma(vma->vm_mm, vma, vmstart, 1);
if (err)
return err;
/* split_vma() invalidated the mas */
mas_pause(&vmi->mas);
}
if (vma->vm_end != vmend) {
err = split_vma(vma->vm_mm, vma, vmend, 0);
if (err)
return err;
/* split_vma() invalidated the mas */
mas_pause(&vmi->mas);
}
*prev = vma;
return vma_replace_policy(vma, new_pol);
}
/* Set the process memory policy */
@ -1259,6 +1245,8 @@ static long do_mbind(unsigned long start, unsigned long len,
nodemask_t *nmask, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
struct vma_iterator vmi;
struct mempolicy *new;
unsigned long end;
int err;
@ -1328,7 +1316,13 @@ static long do_mbind(unsigned long start, unsigned long len,
goto up_out;
}
err = mbind_range(mm, start, end, new);
vma_iter_init(&vmi, mm, start);
prev = vma_prev(&vmi);
for_each_vma_range(vmi, vma, end) {
err = mbind_range(&vmi, vma, &prev, start, end, new);
if (err)
break;
}
if (!err) {
int nr_failed = 0;
@ -1489,10 +1483,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
unsigned long, home_node, unsigned long, flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_area_struct *vma, *prev;
struct mempolicy *new;
unsigned long vmstart;
unsigned long vmend;
unsigned long end;
int err = -ENOENT;
VMA_ITERATOR(vmi, mm, start);
@ -1521,9 +1513,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
if (end == start)
return 0;
mmap_write_lock(mm);
prev = vma_prev(&vmi);
for_each_vma_range(vmi, vma, end) {
vmstart = max(start, vma->vm_start);
vmend = min(end, vma->vm_end);
new = mpol_dup(vma_policy(vma));
if (IS_ERR(new)) {
err = PTR_ERR(new);
@ -1547,7 +1538,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
}
new->home_node = home_node;
err = mbind_range(mm, vmstart, vmend, new);
err = mbind_range(&vmi, vma, &prev, start, end, new);
mpol_put(new);
if (err)
break;

View File

@ -1003,7 +1003,14 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
if (hci_sock_gen_cookie(sk)) {
struct sk_buff *skb;
if (capable(CAP_NET_ADMIN))
/* Perform careful checks before setting the HCI_SOCK_TRUSTED
* flag. Make sure that not only the current task but also
* the socket opener has the required capability, since
* privileged programs can be tricked into making ioctl calls
* on HCI sockets, and the socket should not be marked as
* trusted simply because the ioctl caller is privileged.
*/
if (sk_capable(sk, CAP_NET_ADMIN))
hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
/* Send event to monitor */

View File

@ -2330,7 +2330,26 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
unsigned int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
bool need_push, dispose_it;
bool dispose_it, need_push = false;
/* If the first subflow moved to a close state before accept, e.g. due
* to an incoming reset, mptcp either:
* - if either the subflow or the msk are dead, destroy the context
* (the subflow socket is deleted by inet_child_forget) and the msk
* - otherwise do nothing at the moment and take action at accept and/or
* listener shutdown - user-space must be able to accept() the closed
* socket.
*/
if (msk->in_accept_queue && msk->first == ssk) {
if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
return;
/* ensure later check in mptcp_worker() will dispose the msk */
sock_set_flag(sk, SOCK_DEAD);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk);
goto out_release;
}
dispose_it = !msk->subflow || ssk != msk->subflow->sk;
if (dispose_it)
@ -2366,25 +2385,21 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (!inet_csk(ssk)->icsk_ulp_ops) {
WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
} else if (msk->in_accept_queue && msk->first == ssk) {
/* if the first subflow moved to a close state, e.g. due to
* incoming reset and we reach here before inet_child_forget()
* the TCP stack could later try to close it via
* inet_csk_listen_stop(), or deliver it to the user space via
* accept().
* We can't delete the subflow - or risk a double free - nor let
* the msk survive - or will be leaked in the non accept scenario:
* fallback and let TCP cope with the subflow cleanup.
*/
WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
mptcp_subflow_drop_ctx(ssk);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
if (ssk->sk_state == TCP_LISTEN) {
tcp_set_state(ssk, TCP_CLOSE);
mptcp_subflow_queue_clean(sk, ssk);
inet_csk_listen_stop(ssk);
}
__tcp_close(ssk, 0);
/* close acquired an extra ref */
__sock_put(ssk);
}
out_release:
release_sock(ssk);
sock_put(ssk);
@ -2439,21 +2454,14 @@ static void __mptcp_close_subflow(struct sock *sk)
mptcp_close_ssk(sk, ssk, subflow);
}
/* if the MPC subflow has been closed before the msk is accepted,
* msk will never be accept-ed, close it now
*/
if (!msk->first && msk->in_accept_queue) {
sock_set_flag(sk, SOCK_DEAD);
inet_sk_state_store(sk, TCP_CLOSE);
}
}
static bool mptcp_check_close_timeout(const struct sock *sk)
static bool mptcp_should_close(const struct sock *sk)
{
s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
struct mptcp_subflow_context *subflow;
if (delta >= TCP_TIMEWAIT_LEN)
if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
return true;
/* if all subflows are in closed status don't bother with additional
@ -2661,7 +2669,7 @@ static void mptcp_worker(struct work_struct *work)
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
if (mptcp_check_close_timeout(sk)) {
if (mptcp_should_close(sk)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
}
@ -2906,6 +2914,14 @@ static void __mptcp_destroy_sock(struct sock *sk)
sock_put(sk);
}
void __mptcp_unaccepted_force_close(struct sock *sk)
{
sock_set_flag(sk, SOCK_DEAD);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
__mptcp_destroy_sock(sk);
}
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
/* Concurrent splices from sk_receive_queue into receive_queue will
@ -3753,6 +3769,18 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
/* Do late cleanup for the first subflow as necessary. Also
* deal with bad peers not doing a complete shutdown.
*/
if (msk->first &&
unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
if (unlikely(list_empty(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
}
release_sock(newsk);
}

View File

@ -615,10 +615,12 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);

View File

@ -661,9 +661,12 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
if (!ctx)
return;
subflow_ulp_fallback(ssk, ctx);
if (ctx->conn)
sock_put(ctx->conn);
list_del(&mptcp_subflow_ctx(ssk)->node);
if (inet_csk(ssk)->icsk_ulp_ops) {
subflow_ulp_fallback(ssk, ctx);
if (ctx->conn)
sock_put(ctx->conn);
}
kfree_rcu(ctx, rcu);
}
@ -1758,6 +1761,77 @@ static void subflow_state_change(struct sock *sk)
}
}
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
struct mptcp_sock *msk, *next, *head = NULL;
struct request_sock *req;
struct sock *sk;
/* build a list of all unaccepted mptcp sockets */
spin_lock_bh(&queue->rskq_lock);
for (req = queue->rskq_accept_head; req; req = req->dl_next) {
struct mptcp_subflow_context *subflow;
struct sock *ssk = req->sk;
if (!sk_is_mptcp(ssk))
continue;
subflow = mptcp_subflow_ctx(ssk);
if (!subflow || !subflow->conn)
continue;
/* skip if already in list */
sk = subflow->conn;
msk = mptcp_sk(sk);
if (msk->dl_next || msk == head)
continue;
sock_hold(sk);
msk->dl_next = head;
head = msk;
}
spin_unlock_bh(&queue->rskq_lock);
if (!head)
return;
/* can't acquire the msk socket lock under the subflow one,
* or will cause ABBA deadlock
*/
release_sock(listener_ssk);
for (msk = head; msk; msk = next) {
sk = (struct sock *)msk;
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
next = msk->dl_next;
msk->dl_next = NULL;
__mptcp_unaccepted_force_close(sk);
release_sock(sk);
/* lockdep will report a false positive ABBA deadlock
* between cancel_work_sync and the listener socket.
* The involved locks belong to different sockets WRT
* the existing AB chain.
* Using a per socket key is problematic as key
* deregistration requires process context and must be
* performed at socket disposal time, in atomic
* context.
* Just tell lockdep to consider the listener socket
* released here.
*/
mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
mptcp_cancel_work(sk);
mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_);
sock_put(sk);
}
/* we are still under the listener msk socket lock */
lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
}
static int subflow_ulp_init(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);