android_kernel_asus_sm8350/net/ipv4/tcp_ipv4.c
Michael Bestas c066ac93be
Merge tag 'ASB-2023-02-05_11-5.4' of https://android.googlesource.com/kernel/common into android13-5.4-lahaina
https://source.android.com/docs/security/bulletin/2023-02-01
CVE-2022-39189
CVE-2022-39842
CVE-2022-41222
CVE-2023-20937
CVE-2023-20938
CVE-2022-0850

* tag 'ASB-2023-02-05_11-5.4' of https://android.googlesource.com/kernel/common:
  ANDROID: ABI: Cuttlefish Symbol update
  UPSTREAM: media: dvb-core: Fix UAF due to refcount races at releasing
  ANDROID: abi_gki_aarch64_qcom: Add hrtimer_sleeper_start_expires
  UPSTREAM: ALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF
  ANDROID: Revert "tracing/ring-buffer: Have polling block on watermark"
  UPSTREAM: usb: gadget: f_hid: fix f_hidg lifetime vs cdev
  UPSTREAM: usb: gadget: f_hid: optional SETUP/SET_REPORT mode
  ANDROID: add TEST_MAPPING for net/, include/net
  UPSTREAM: nfp: fix use-after-free in area_cache_get()
  UPSTREAM: proc: avoid integer type confusion in get_proc_long
  UPSTREAM: proc: proc_skip_spaces() shouldn't think it is working on C strings
  ANDROID: usb: f_accessory: Check buffer size when initialised via composite
  BACKPORT: mm: don't be stuck to rmap lock on reclaim path
  ANDROID: Add more hvc devices for virtio-console.
  Revert "mmc: sdhci: Fix voltage switch delay"
  ANDROID: gki_defconfig: add CONFIG_FUNCTION_ERROR_INJECTION
  Linux 5.4.226
  ipc/sem: Fix dangling sem_array access in semtimedop race
  v4l2: don't fall back to follow_pfn() if pin_user_pages_fast() fails
  proc: proc_skip_spaces() shouldn't think it is working on C strings
  proc: avoid integer type confusion in get_proc_long
  mmc: sdhci: Fix voltage switch delay
  mmc: sdhci: use FIELD_GET for preset value bit masks
  char: tpm: Protect tpm_pm_suspend with locks
  Revert "clocksource/drivers/riscv: Events are stopped during CPU suspend"
  x86/ioremap: Fix page aligned size calculation in __ioremap_caller()
  Bluetooth: L2CAP: Fix accepting connection request for invalid SPSM
  x86/pm: Add enumeration check before spec MSRs save/restore setup
  x86/tsx: Add a feature bit for TSX control MSR support
  nvme: ensure subsystem reset is single threaded
  nvme: restrict management ioctls to admin
  epoll: check for events when removing a timed out thread from the wait queue
  epoll: call final ep_events_available() check under the lock
  tracing/ring-buffer: Have polling block on watermark
  ipv4: Fix route deletion when nexthop info is not specified
  ipv4: Handle attempt to delete multipath route when fib_info contains an nh reference
  selftests: net: fix nexthop warning cleanup double ip typo
  selftests: net: add delete nexthop route warning test
  Kconfig.debug: provide a little extra FRAME_WARN leeway when KASAN is enabled
  parisc: Increase FRAME_WARN to 2048 bytes on parisc
  xtensa: increase size of gcc stack frame check
  parisc: Increase size of gcc stack frame check
  iommu/vt-d: Fix PCI device refcount leak in dmar_dev_scope_init()
  pinctrl: single: Fix potential division by zero
  ASoC: ops: Fix bounds check for _sx controls
  mm: Fix '.data.once' orphan section warning
  arm64: errata: Fix KVM Spectre-v2 mitigation selection for Cortex-A57/A72
  arm64: Fix panic() when Spectre-v2 causes Spectre-BHB to re-allocate KVM vectors
  tracing: Free buffers when a used dynamic event is removed
  mmc: sdhci-sprd: Fix no reset data and command after voltage switch
  mmc: sdhci-esdhc-imx: correct CQHCI exit halt state check
  mmc: core: Fix ambiguous TRIM and DISCARD arg
  mmc: mmc_test: Fix removal of debugfs file
  pinctrl: intel: Save and restore pins in "direct IRQ" mode
  x86/bugs: Make sure MSR_SPEC_CTRL is updated properly upon resume from S3
  nilfs2: fix NULL pointer dereference in nilfs_palloc_commit_free_entry()
  tools/vm/slabinfo-gnuplot: use "grep -E" instead of "egrep"
  error-injection: Add prompt for function error injection
  net/mlx5: DR, Fix uninitialized var warning
  hwmon: (coretemp) fix pci device refcount leak in nv1a_ram_new()
  hwmon: (coretemp) Check for null before removing sysfs attrs
  net: ethernet: renesas: ravb: Fix promiscuous mode after system resumed
  sctp: fix memory leak in sctp_stream_outq_migrate()
  packet: do not set TP_STATUS_CSUM_VALID on CHECKSUM_COMPLETE
  net: tun: Fix use-after-free in tun_detach()
  afs: Fix fileserver probe RTT handling
  net: hsr: Fix potential use-after-free
  dsa: lan9303: Correct stat name
  net: ethernet: nixge: fix NULL dereference
  net/9p: Fix a potential socket leak in p9_socket_open
  net: net_netdev: Fix error handling in ntb_netdev_init_module()
  net: phy: fix null-ptr-deref while probe() failed
  wifi: cfg80211: fix buffer overflow in elem comparison
  qlcnic: fix sleep-in-atomic-context bugs caused by msleep
  can: cc770: cc770_isa_probe(): add missing free_cc770dev()
  can: sja1000_isa: sja1000_isa_probe(): add missing free_sja1000dev()
  net/mlx5e: Fix use-after-free when reverting termination table
  net/mlx5: Fix uninitialized variable bug in outlen_write()
  of: property: decrement node refcount in of_fwnode_get_reference_args()
  hwmon: (ibmpex) Fix possible UAF when ibmpex_register_bmc() fails
  hwmon: (i5500_temp) fix missing pci_disable_device()
  scripts/faddr2line: Fix regression in name resolution on ppc64le
  iio: light: rpr0521: add missing Kconfig dependencies
  iio: health: afe4404: Fix oob read in afe4404_[read|write]_raw
  iio: health: afe4403: Fix oob read in afe4403_read_raw
  btrfs: qgroup: fix sleep from invalid context bug in btrfs_qgroup_inherit()
  drm/amdgpu: Partially revert "drm/amdgpu: update drm_display_info correctly when the edid is read"
  drm/amdgpu: update drm_display_info correctly when the edid is read
  btrfs: move QUOTA_ENABLED check to rescan_should_stop from btrfs_qgroup_rescan_worker
  spi: spi-imx: Fix spi_bus_clk if requested clock is higher than input clock
  btrfs: free btrfs_path before copying inodes to userspace
  fuse: lock inode unconditionally in fuse_fallocate()
  drm/i915: fix TLB invalidation for Gen12 video and compute engines
  drm/amdgpu: always register an MMU notifier for userptr
  drm/amd/dc/dce120: Fix audio register mapping, stop triggering KASAN
  btrfs: sysfs: normalize the error handling branch in btrfs_init_sysfs()
  btrfs: free btrfs_path before copying subvol info to userspace
  btrfs: free btrfs_path before copying fspath to userspace
  btrfs: free btrfs_path before copying root refs to userspace
  binder: Gracefully handle BINDER_TYPE_FDA objects with num_fds=0
  binder: Address corner cases in deferred copy and fixup
  binder: fix pointer cast warning
  binder: defer copies of pre-patched txn data
  binder: read pre-translated fds from sender buffer
  binder: avoid potential data leakage when copying txn
  dm integrity: flush the journal on suspend
  net: usb: qmi_wwan: add Telit 0x103a composition
  tcp: configurable source port perturb table size
  platform/x86: hp-wmi: Ignore Smart Experience App event
  platform/x86: acer-wmi: Enable SW_TABLET_MODE on Switch V 10 (SW5-017)
  platform/x86: asus-wmi: add missing pci_dev_put() in asus_wmi_set_xusb2pr()
  xen/platform-pci: add missing free_irq() in error path
  serial: 8250: 8250_omap: Avoid RS485 RTS glitch on ->set_termios()
  ASoC: Intel: bytcht_es8316: Add quirk for the Nanote UMPC-01
  Input: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode
  gcov: clang: fix the buffer overflow issue
  nilfs2: fix nilfs_sufile_mark_dirty() not set segment usage as dirty
  firmware: coreboot: Register bus in module init
  firmware: google: Release devices before unregistering the bus
  ceph: avoid putting the realm twice when decoding snaps fails
  ceph: do not update snapshot context when there is no new snapshot
  iio: pressure: ms5611: fixed value compensation bug
  iio: ms5611: Simplify IO callback parameters
  nios2: add FORCE for vmlinuz.gz
  init/Kconfig: fix CC_HAS_ASM_GOTO_TIED_OUTPUT test with dash
  iio: core: Fix entry not deleted when iio_register_sw_trigger_type() fails
  iio: light: apds9960: fix wrong register for gesture gain
  arm64: dts: rockchip: lower rk3399-puma-haikou SD controller clock frequency
  usb: dwc3: exynos: Fix remove() function
  lib/vdso: use "grep -E" instead of "egrep"
  s390/crashdump: fix TOD programmable field size
  net: thunderx: Fix the ACPI memory leak
  nfc: st-nci: fix memory leaks in EVT_TRANSACTION
  nfc: st-nci: fix incorrect validating logic in EVT_TRANSACTION
  s390/dasd: fix no record found for raw_track_access
  dccp/tcp: Reset saddr on failure after inet6?_hash_connect().
  bnx2x: fix pci device refcount leak in bnx2x_vf_is_pcie_pending()
  regulator: twl6030: re-add TWL6032_SUBCLASS
  NFC: nci: fix memory leak in nci_rx_data_packet()
  xfrm: Fix ignored return value in xfrm6_init()
  tipc: check skb_linearize() return value in tipc_disc_rcv()
  tipc: add an extra conn_get in tipc_conn_alloc
  tipc: set con sock in tipc_conn_alloc
  net/mlx5: Fix FW tracer timestamp calculation
  Drivers: hv: vmbus: fix possible memory leak in vmbus_device_register()
  Drivers: hv: vmbus: fix double free in the error path of vmbus_add_channel_work()
  nfp: add port from netdev validation for EEPROM access
  net: pch_gbe: fix pci device refcount leak while module exiting
  net/qla3xxx: fix potential memleak in ql3xxx_send()
  net/mlx4: Check retval of mlx4_bitmap_init
  ARM: mxs: fix memory leak in mxs_machine_init()
  9p/fd: fix issue of list_del corruption in p9_fd_cancel()
  net: pch_gbe: fix potential memleak in pch_gbe_tx_queue()
  nfc/nci: fix race with opening and closing
  net: liquidio: simplify if expression
  ARM: dts: at91: sam9g20ek: enable udc vbus gpio pinctrl
  tee: optee: fix possible memory leak in optee_register_device()
  bus: sunxi-rsb: Support atomic transfers
  regulator: core: fix UAF in destroy_regulator()
  regulator: core: fix kobject release warning and memory leak in regulator_register()
  ASoC: sgtl5000: Reset the CHIP_CLK_CTRL reg on remove
  ARM: dts: am335x-pcm-953: Define fixed regulators in root node
  af_key: Fix send_acquire race with pfkey_register
  MIPS: pic32: treat port as signed integer
  RISC-V: vdso: Do not add missing symbols to version section in linker script
  arm64/syscall: Include asm/ptrace.h in syscall_wrapper header.
  block, bfq: fix null pointer dereference in bfq_bio_bfqg()
  drm: panel-orientation-quirks: Add quirk for Acer Switch V 10 (SW5-017)
  spi: stm32: fix stm32_spi_prepare_mbr() that halves spi clk for every run
  wifi: mac80211: Fix ack frame idr leak when mesh has no route
  audit: fix undefined behavior in bit shift for AUDIT_BIT
  wifi: mac80211_hwsim: fix debugfs attribute ps with rc table support
  wifi: mac80211: fix memory free error when registering wiphy fail
  Revert "can: af_can: fix NULL pointer dereference in can_rx_register()"
  Linux 5.4.225
  ntfs: check overflow when iterating ATTR_RECORDs
  ntfs: fix out-of-bounds read in ntfs_attr_find()
  ntfs: fix use-after-free in ntfs_attr_find()
  mm: fs: initialize fsdata passed to write_begin/write_end interface
  9p/trans_fd: always use O_NONBLOCK read/write
  gfs2: Switch from strlcpy to strscpy
  gfs2: Check sb_bsize_shift after reading superblock
  9p: trans_fd/p9_conn_cancel: drop client lock earlier
  kcm: close race conditions on sk_receive_queue
  bpf, test_run: Fix alignment problem in bpf_prog_test_run_skb()
  kcm: avoid potential race in kcm_tx_work
  tcp: cdg: allow tcp_cdg_release() to be called multiple times
  macvlan: enforce a consistent minimal mtu
  Input: i8042 - fix leaking of platform device on module removal
  kprobes: Skip clearing aggrprobe's post_handler in kprobe-on-ftrace case
  scsi: target: tcm_loop: Fix possible name leak in tcm_loop_setup_hba_bus()
  ring-buffer: Include dropped pages in counting dirty patches
  serial: 8250: Flush DMA Rx on RLSI
  misc/vmw_vmci: fix an infoleak in vmci_host_do_receive_datagram()
  docs: update mediator contact information in CoC doc
  mmc: sdhci-pci: Fix possible memory leak caused by missing pci_dev_put()
  mmc: sdhci-pci-o2micro: fix card detect fail issue caused by CD# debounce timeout
  mmc: core: properly select voltage range without power cycle
  scsi: zfcp: Fix double free of FSF request when qdio send fails
  Input: iforce - invert valid length check when fetching device IDs
  serial: 8250_lpss: Configure DMA also w/o DMA filter
  serial: 8250: Fall back to non-DMA Rx if IIR_RDI occurs
  dm ioctl: fix misbehavior if list_versions races with module loading
  iio: pressure: ms5611: changed hardcoded SPI speed to value limited
  iio: trigger: sysfs: fix possible memory leak in iio_sysfs_trig_init()
  iio: adc: at91_adc: fix possible memory leak in at91_adc_allocate_trigger()
  usb: chipidea: fix deadlock in ci_otg_del_timer
  usb: add NO_LPM quirk for Realforce 87U Keyboard
  USB: serial: option: add Fibocom FM160 0x0111 composition
  USB: serial: option: add u-blox LARA-L6 modem
  USB: serial: option: add u-blox LARA-R6 00B modem
  USB: serial: option: remove old LARA-R6 PID
  USB: serial: option: add Sierra Wireless EM9191
  speakup: fix a segfault caused by switching consoles
  slimbus: stream: correct presence rate frequencies
  Revert "usb: dwc3: disable USB core PHY management"
  ALSA: usb-audio: Drop snd_BUG_ON() from snd_usbmidi_output_open()
  ring_buffer: Do not deactivate non-existant pages
  ftrace: Fix null pointer dereference in ftrace_add_mod()
  ftrace: Optimize the allocation for mcount entries
  ftrace: Fix the possible incorrect kernel message
  cifs: add check for returning value of SMB2_set_info_init
  net: thunderbolt: Fix error handling in tbnet_init()
  cifs: Fix wrong return value checking when GETFLAGS
  net/x25: Fix skb leak in x25_lapb_receive_frame()
  platform/x86/intel: pmc: Don't unconditionally attach Intel PMC when virtualized
  drbd: use after free in drbd_create_device()
  xen/pcpu: fix possible memory leak in register_pcpu()
  bnxt_en: Remove debugfs when pci_register_driver failed
  net: caif: fix double disconnect client in chnl_net_open()
  net: macvlan: Use built-in RCU list checking
  mISDN: fix misuse of put_device() in mISDN_register_device()
  net: liquidio: release resources when liquidio driver open failed
  mISDN: fix possible memory leak in mISDN_dsp_element_register()
  net: bgmac: Drop free_netdev() from bgmac_enet_remove()
  ata: libata-transport: fix double ata_host_put() in ata_tport_add()
  arm64: dts: imx8mn: Fix NAND controller size-cells
  arm64: dts: imx8mm: Fix NAND controller size-cells
  pinctrl: devicetree: fix null pointer dereferencing in pinctrl_dt_to_map
  parport_pc: Avoid FIFO port location truncation
  siox: fix possible memory leak in siox_device_add()
  block: sed-opal: kmalloc the cmd/resp buffers
  ASoC: soc-utils: Remove __exit for snd_soc_util_exit()
  tty: n_gsm: fix sleep-in-atomic-context bug in gsm_control_send
  serial: imx: Add missing .thaw_noirq hook
  serial: 8250: omap: Flush PM QOS work on remove
  serial: 8250: omap: Fix unpaired pm_runtime_put_sync() in omap8250_remove()
  serial: 8250_omap: remove wait loop from Errata i202 workaround
  ASoC: core: Fix use-after-free in snd_soc_exit()
  spi: stm32: Print summary 'callbacks suppressed' message
  ASoC: codecs: jz4725b: Fix spelling mistake "Sourc" -> "Source", "Routee" -> "Route"
  Bluetooth: L2CAP: Fix l2cap_global_chan_by_psm
  btrfs: remove pointless and double ulist frees in error paths of qgroup tests
  drm/imx: imx-tve: Fix return type of imx_tve_connector_mode_valid
  i2c: i801: add lis3lv02d's I2C address for Vostro 5568
  NFSv4: Retry LOCK on OLD_STATEID during delegation return
  selftests/intel_pstate: fix build for ARCH=x86_64
  selftests/futex: fix build for clang
  ASoC: codecs: jz4725b: fix capture selector naming
  ASoC: codecs: jz4725b: use right control for Capture Volume
  ASoC: codecs: jz4725b: fix reported volume for Master ctl
  ASoC: codecs: jz4725b: add missed Line In power control bit
  spi: intel: Fix the offset to get the 64K erase opcode
  ASoC: wm8962: Add an event handler for TEMP_HP and TEMP_SPK
  ASoC: wm8997: Revert "ASoC: wm8997: Fix PM disable depth imbalance in wm8997_probe"
  ASoC: wm5110: Revert "ASoC: wm5110: Fix PM disable depth imbalance in wm5110_probe"
  ASoC: wm5102: Revert "ASoC: wm5102: Fix PM disable depth imbalance in wm5102_probe"
  x86/cpu: Restore AMD's DE_CFG MSR after resume
  net: tun: call napi_schedule_prep() to ensure we own a napi
  dmaengine: at_hdmac: Check return code of dma_async_device_register
  dmaengine: at_hdmac: Fix impossible condition
  dmaengine: at_hdmac: Don't allow CPU to reorder channel enable
  dmaengine: at_hdmac: Fix completion of unissued descriptor in case of errors
  dmaengine: at_hdmac: Don't start transactions at tx_submit level
  dmaengine: at_hdmac: Fix at_lli struct definition
  cert host tools: Stop complaining about deprecated OpenSSL functions
  can: j1939: j1939_send_one(): fix missing CAN header initialization
  udf: Fix a slab-out-of-bounds write bug in udf_find_entry()
  btrfs: selftests: fix wrong error check in btrfs_free_dummy_root()
  platform/x86: hp_wmi: Fix rfkill causing soft blocked wifi
  drm/i915/dmabuf: fix sg_table handling in map_dma_buf
  nilfs2: fix use-after-free bug of ns_writer on remount
  nilfs2: fix deadlock in nilfs_count_free_blocks()
  vmlinux.lds.h: Fix placement of '.data..decrypted' section
  ALSA: usb-audio: Add DSD support for Accuphase DAC-60
  ALSA: usb-audio: Add quirk entry for M-Audio Micro
  ALSA: hda: fix potential memleak in 'add_widget_node'
  ALSA: hda/ca0132: add quirk for EVGA Z390 DARK
  mmc: sdhci-tegra: Fix SDHCI_RESET_ALL for CQHCI
  mmc: sdhci-of-arasan: Fix SDHCI_RESET_ALL for CQHCI
  mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI
  MIPS: jump_label: Fix compat branch range check
  arm64: efi: Fix handling of misaligned runtime regions and drop warning
  riscv: process: fix kernel info leakage
  net: macvlan: fix memory leaks of macvlan_common_newlink
  ethernet: tundra: free irq when alloc ring failed in tsi108_open()
  net: mv643xx_eth: disable napi when init rxq or txq failed in mv643xx_eth_open()
  ethernet: s2io: disable napi when start nic failed in s2io_card_up()
  cxgb4vf: shut down the adapter when t4vf_update_port_info() failed in cxgb4vf_open()
  net: cxgb3_main: disable napi when bind qsets failed in cxgb_up()
  net: cpsw: disable napi in cpsw_ndo_open()
  net/mlx5: Allow async trigger completion execution on single CPU systems
  net: nixge: disable napi when enable interrupts failed in nixge_open()
  perf stat: Fix printing os->prefix in CSV metrics output
  drivers: net: xgene: disable napi when register irq failed in xgene_enet_open()
  dmaengine: mv_xor_v2: Fix a resource leak in mv_xor_v2_remove()
  dmaengine: pxa_dma: use platform_get_irq_optional
  tipc: fix the msg->req tlv len check in tipc_nl_compat_name_table_dump_header
  can: af_can: fix NULL pointer dereference in can_rx_register()
  ipv6: addrlabel: fix infoleak when sending struct ifaddrlblmsg to network
  drm/vc4: Fix missing platform_unregister_drivers() call in vc4_drm_register()
  hamradio: fix issue of dev reference count leakage in bpq_device_event()
  net: lapbether: fix issue of dev reference count leakage in lapbeth_device_event()
  capabilities: fix undefined behavior in bit shift for CAP_TO_MASK
  net: fman: Unregister ethernet device on removal
  bnxt_en: fix potentially incorrect return value for ndo_rx_flow_steer
  bnxt_en: Fix possible crash in bnxt_hwrm_set_coal()
  net: tun: Fix memory leaks of napi_get_frags
  net: gso: fix panic on frag_list with mixed head alloc types
  HID: hyperv: fix possible memory leak in mousevsc_probe()
  bpf, sockmap: Fix the sk->sk_forward_alloc warning of sk_stream_kill_queues
  wifi: cfg80211: fix memory leak in query_regdb_file()
  wifi: cfg80211: silence a sparse RCU warning
  phy: stm32: fix an error code in probe
  xfs: drain the buf delwri queue before xfsaild idles
  xfs: preserve inode versioning across remounts
  xfs: use MMAPLOCK around filemap_map_pages()
  xfs: redesign the reflink remap loop to fix blkres depletion crash
  xfs: rename xfs_bmap_is_real_extent to is_written_extent
  xfs: preserve rmapbt swapext block reservation from freed blocks
  ANDROID: properly copy the scm_io_uring field in struct sk_buff
  Linux 5.4.224
  ipc: remove memcg accounting for sops objects in do_semtimedop()
  wifi: brcmfmac: Fix potential buffer overflow in brcmf_fweh_event_worker()
  drm/i915/sdvo: Setup DDC fully before output init
  drm/i915/sdvo: Filter out invalid outputs more sensibly
  drm/rockchip: dsi: Force synchronous probe
  mtd: rawnand: gpmi: Set WAIT_FOR_READY timeout based on program/erase times
  KVM: x86: emulator: update the emulation mode after CR0 write
  KVM: x86: emulator: introduce emulator_recalc_and_set_mode
  KVM: x86: emulator: em_sysexit should update ctxt->mode
  KVM: x86: Mask off reserved bits in CPUID.80000008H
  KVM: x86: Mask off reserved bits in CPUID.8000001AH
  ext4: fix BUG_ON() when directory entry has invalid rec_len
  ext4: fix warning in 'ext4_da_release_space'
  parisc: Avoid printing the hardware path twice
  parisc: Export iosapic_serial_irq() symbol for serial port driver
  parisc: Make 8250_gsc driver dependend on CONFIG_PARISC
  ALSA: usb-audio: Add quirks for MacroSilicon MS2100/MS2106 devices
  perf/x86/intel: Add Cooper Lake stepping to isolation_ucodes[]
  perf/x86/intel: Fix pebs event constraints for ICL
  efi: random: reduce seed size to 32 bytes
  fuse: add file_modified() to fallocate
  capabilities: fix potential memleak on error path from vfs_getxattr_alloc()
  tracing/histogram: Update document for KEYS_MAX size
  tools/nolibc/string: Fix memcmp() implementation
  kprobe: reverse kp->flags when arm_kprobe failed
  tcp/udp: Make early_demux back namespacified.
  btrfs: fix type of parameter generation in btrfs_get_dentry
  binder: fix UAF of alloc->vma in race with munmap()
  memcg: enable accounting of ipc resources
  tcp/udp: Fix memory leak in ipv6_renew_options().
  block, bfq: protect 'bfqd->queued' by 'bfqd->lock'
  Bluetooth: L2CAP: Fix attempting to access uninitialized memory
  xfs: Add the missed xfs_perag_put() for xfs_ifree_cluster()
  xfs: don't fail unwritten extent conversion on writeback due to edquot
  xfs: group quota should return EDQUOT when prj quota enabled
  xfs: gut error handling in xfs_trans_unreserve_and_mod_sb()
  xfs: use ordered buffers to initialize dquot buffers during quotacheck
  xfs: don't fail verifier on empty attr3 leaf block
  i2c: xiic: Add platform module alias
  HID: saitek: add madcatz variant of MMO7 mouse device ID
  scsi: core: Restrict legal sdev_state transitions via sysfs
  media: meson: vdec: fix possible refcount leak in vdec_probe()
  media: dvb-frontends/drxk: initialize err to 0
  media: cros-ec-cec: limit msg.len to CEC_MAX_MSG_SIZE
  media: s5p_cec: limit msg.len to CEC_MAX_MSG_SIZE
  ipv6: fix WARNING in ip6_route_net_exit_late()
  net, neigh: Fix null-ptr-deref in neigh_table_clear()
  net: mdio: fix undefined behavior in bit shift for __mdiobus_register
  Bluetooth: L2CAP: fix use-after-free in l2cap_conn_del()
  Bluetooth: L2CAP: Fix use-after-free caused by l2cap_reassemble_sdu
  btrfs: fix ulist leaks in error paths of qgroup self tests
  btrfs: fix inode list leak during backref walking at find_parent_nodes()
  btrfs: fix inode list leak during backref walking at resolve_indirect_refs()
  isdn: mISDN: netjet: fix wrong check of device registration
  mISDN: fix possible memory leak in mISDN_register_device()
  rose: Fix NULL pointer dereference in rose_send_frame()
  ipvs: fix WARNING in ip_vs_app_net_cleanup()
  ipvs: fix WARNING in __ip_vs_cleanup_batch()
  ipvs: use explicitly signed chars
  netfilter: nf_tables: release flow rule object from commit path
  net: tun: fix bugs for oversize packet when napi frags enabled
  net: sched: Fix use after free in red_enqueue()
  ata: pata_legacy: fix pdc20230_set_piomode()
  net: fec: fix improper use of NETDEV_TX_BUSY
  nfc: nfcmrvl: Fix potential memory leak in nfcmrvl_i2c_nci_send()
  nfc: s3fwrn5: Fix potential memory leak in s3fwrn5_nci_send()
  RDMA/qedr: clean up work queue on failure in qedr_alloc_resources()
  RDMA/core: Fix null-ptr-deref in ib_core_cleanup()
  net: dsa: Fix possible memory leaks in dsa_loop_init()
  nfs4: Fix kmemleak when allocate slot failed
  NFSv4.1: We must always send RECLAIM_COMPLETE after a reboot
  NFSv4.1: Handle RECLAIM_COMPLETE trunking errors
  IB/hfi1: Correctly move list in sc_disable()
  RDMA/cma: Use output interface for net_dev check
  Linux 5.4.223
  can: rcar_canfd: rcar_canfd_handle_global_receive(): fix IRQ storm on global FIFO receive
  net: enetc: survive memory pressure without crashing
  net/mlx5: Fix possible use-after-free in async command interface
  net/mlx5e: Do not increment ESN when updating IPsec ESN state
  nh: fix scope used to find saddr when adding non gw nh
  net: ehea: fix possible memory leak in ehea_register_port()
  openvswitch: switch from WARN to pr_warn
  ALSA: aoa: Fix I2S device accounting
  ALSA: aoa: i2sbus: fix possible memory leak in i2sbus_add_dev()
  PM: domains: Fix handling of unavailable/disabled idle states
  net: ksz884x: fix missing pci_disable_device() on error in pcidev_init()
  i40e: Fix flow-type by setting GL_HASH_INSET registers
  i40e: Fix VF hang when reset is triggered on another VF
  i40e: Fix ethtool rx-flow-hash setting for X722
  media: videodev2.h: V4L2_DV_BT_BLANKING_HEIGHT should check 'interlaced'
  media: v4l2-dv-timings: add sanity checks for blanking values
  media: vivid: dev->bitmap_cap wasn't freed in all cases
  media: vivid: s_fbuf: add more sanity checks
  PM: hibernate: Allow hybrid sleep to work with s2idle
  can: mscan: mpc5xxx: mpc5xxx_can_probe(): add missing put_clock() in error path
  tcp: fix indefinite deferral of RTO with SACK reneging
  net: lantiq_etop: don't free skb when returning NETDEV_TX_BUSY
  net: fix UAF issue in nfqnl_nf_hook_drop() when ops_init() failed
  kcm: annotate data-races around kcm->rx_wait
  kcm: annotate data-races around kcm->rx_psock
  amd-xgbe: add the bit rate quirk for Molex cables
  amd-xgbe: fix the SFP compliance codes check for DAC cables
  x86/unwind/orc: Fix unreliable stack dump with gcov
  net: netsec: fix error handling in netsec_register_mdio()
  tipc: fix a null-ptr-deref in tipc_topsrv_accept
  ALSA: ac97: fix possible memory leak in snd_ac97_dev_register()
  arc: iounmap() arg is volatile
  drm/msm: Fix return type of mdp4_lvds_connector_mode_valid
  media: v4l2: Fix v4l2_i2c_subdev_set_name function documentation
  net: ieee802154: fix error return code in dgram_bind()
  mm,hugetlb: take hugetlb_lock before decrementing h->resv_huge_pages
  cgroup-v1: add disabled controller check in cgroup1_parse_param()
  xen/gntdev: Prevent leaking grants
  Xen/gntdev: don't ignore kernel unmapping error
  xfs: force the log after remapping a synchronous-writes file
  xfs: clear XFS_DQ_FREEING if we can't lock the dquot buffer to flush
  xfs: finish dfops on every insert range shift iteration
  s390/pci: add missing EX_TABLE entries to __pcistg_mio_inuser()/__pcilg_mio_inuser()
  s390/futex: add missing EX_TABLE entry to __futex_atomic_op()
  perf auxtrace: Fix address filter symbol name match for modules
  kernfs: fix use-after-free in __kernfs_remove
  mmc: core: Fix kernel panic when remove non-standard SDIO card
  drm/msm/hdmi: fix memory corruption with too many bridges
  drm/msm/dsi: fix memory corruption with too many bridges
  mac802154: Fix LQI recording
  fbdev: smscufx: Fix several use-after-free bugs
  iio: light: tsl2583: Fix module unloading
  tools: iio: iio_utils: fix digit calculation
  xhci: Remove device endpoints from bandwidth list when freeing the device
  mtd: rawnand: marvell: Use correct logic for nand-keep-config
  usb: xhci: add XHCI_SPURIOUS_SUCCESS to ASM1042 despite being a V0.96 controller
  usb: bdc: change state when port disconnected
  usb: dwc3: gadget: Don't set IMI for no_interrupt
  usb: dwc3: gadget: Stop processing more requests on IMI
  USB: add RESET_RESUME quirk for NVIDIA Jetson devices in RCM
  ALSA: au88x0: use explicitly signed char
  ALSA: Use del_timer_sync() before freeing timer
  can: kvaser_usb: Fix possible completions during init_completion
  can: j1939: transport: j1939_session_skb_drop_old(): spin_unlock_irqrestore() before kfree_skb()
  UPSTREAM: once: fix section mismatch on clang builds
  ANDROID: fix up struct sk_buf ABI breakage
  ANDROID: fix up CRC issue with struct tcp_sock
  Linux 5.4.222
  once: fix section mismatch on clang builds
  ANDROID: fix up 131287ff83 ("once: add DO_ONCE_SLOW() for sleepable contexts")
  Revert "serial: 8250: Fix restoring termios speed after suspend"
  Linux 5.4.221
  mm: /proc/pid/smaps_rollup: fix no vma's null-deref
  hv_netvsc: Fix race between VF offering and VF association message from host
  Makefile.debug: re-enable debug info for .S files
  ACPI: video: Force backlight native for more TongFang devices
  riscv: topology: fix default topology reporting
  arm64: topology: move store_cpu_topology() to shared code
  iommu/vt-d: Clean up si_domain in the init_dmars() error path
  net: hns: fix possible memory leak in hnae_ae_register()
  net: sched: cake: fix null pointer access issue when cake_init() fails
  net: phy: dp83867: Extend RX strap quirk for SGMII mode
  net/atm: fix proc_mpc_write incorrect return value
  HID: magicmouse: Do not set BTN_MOUSE on double report
  tipc: fix an information leak in tipc_topsrv_kern_subscr
  tipc: Fix recognition of trial period
  ACPI: extlog: Handle multiple records
  btrfs: fix processing of delayed tree block refs during backref walking
  btrfs: fix processing of delayed data refs during backref walking
  r8152: add PID for the Lenovo OneLink+ Dock
  arm64: errata: Remove AES hwcap for COMPAT tasks
  media: venus: dec: Handle the case where find_format fails
  KVM: arm64: vgic: Fix exit condition in scan_its_table()
  ata: ahci: Match EM_MAX_SLOTS with SATA_PMP_MAX_PORTS
  ata: ahci-imx: Fix MODULE_ALIAS
  hwmon/coretemp: Handle large core ID value
  x86/microcode/AMD: Apply the patch early on every logical thread
  ocfs2: fix BUG when iput after ocfs2_mknod fails
  ocfs2: clear dinode links count in case of error
  xfs: fix use-after-free on CIL context on shutdown
  xfs: move inode flush to the sync workqueue
  xfs: reflink should force the log out if mounted with wsync
  xfs: factor out a new xfs_log_force_inode helper
  xfs: trylock underlying buffer on dquot flush
  xfs: don't write a corrupt unmount record to force summary counter recalc
  xfs: tail updates only need to occur when LSN changes
  xfs: factor common AIL item deletion code
  xfs: Throttle commits on delayed background CIL push
  xfs: Lower CIL flush limit for large logs
  xfs: preserve default grace interval during quotacheck
  xfs: fix unmount hang and memory leak on shutdown during quotaoff
  xfs: factor out quotaoff intent AIL removal and memory free
  xfs: Replace function declaration by actual definition
  xfs: remove the xfs_qoff_logitem_t typedef
  xfs: remove the xfs_dq_logitem_t typedef
  xfs: remove the xfs_disk_dquot_t and xfs_dquot_t
  xfs: Use scnprintf() for avoiding potential buffer overflow
  xfs: check owner of dir3 blocks
  xfs: check owner of dir3 data blocks
  xfs: fix buffer corruption reporting when xfs_dir3_free_header_check fails
  xfs: xfs_buf_corruption_error should take __this_address
  xfs: add a function to deal with corrupt buffers post-verifiers
  xfs: rework collapse range into an atomic operation
  xfs: rework insert range into an atomic operation
  xfs: open code insert range extent split helper
  Linux 5.4.220
  thermal: intel_powerclamp: Use first online CPU as control_cpu
  inet: fully convert sk->sk_rx_dst to RCU rules
  efi: libstub: drop pointless get_memory_map() call
  md: Replace snprintf with scnprintf
  ext4: continue to expand file system when the target size doesn't reach
  net/ieee802154: don't warn zero-sized raw_sendmsg()
  Revert "net/ieee802154: reject zero-sized raw_sendmsg()"
  net: ieee802154: return -EINVAL for unknown addr type
  io_uring/af_unix: defer registered files gc to io_uring release
  perf intel-pt: Fix segfault in intel_pt_print_info() with uClibc
  clk: bcm2835: Make peripheral PLLC critical
  usb: idmouse: fix an uninit-value in idmouse_open
  nvmet-tcp: add bounds check on Transfer Tag
  nvme: copy firmware_rev on each init
  staging: rtl8723bs: fix a potential memory leak in rtw_init_cmd_priv()
  Revert "usb: storage: Add quirk for Samsung Fit flash"
  usb: musb: Fix musb_gadget.c rxstate overflow bug
  usb: host: xhci: Fix potential memory leak in xhci_alloc_stream_info()
  md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d
  HID: roccat: Fix use-after-free in roccat_read()
  bcache: fix set_at_max_writeback_rate() for multiple attached devices
  ata: libahci_platform: Sanity check the DT child nodes number
  staging: vt6655: fix potential memory leak
  power: supply: adp5061: fix out-of-bounds read in adp5061_get_chg_type()
  nbd: Fix hung when signal interrupts nbd_start_device_ioctl()
  scsi: 3w-9xxx: Avoid disabling device if failing to enable it
  clk: zynqmp: pll: rectify rate rounding in zynqmp_pll_round_rate
  media: cx88: Fix a null-ptr-deref bug in buffer_prepare()
  clk: zynqmp: Fix stack-out-of-bounds in strncpy`
  btrfs: scrub: try to fix super block errors
  ARM: dts: imx6sx: add missing properties for sram
  ARM: dts: imx6sll: add missing properties for sram
  ARM: dts: imx6sl: add missing properties for sram
  ARM: dts: imx6qp: add missing properties for sram
  ARM: dts: imx6dl: add missing properties for sram
  ARM: dts: imx6q: add missing properties for sram
  ARM: dts: imx7d-sdb: config the max pressure for tsc2046
  mmc: sdhci-msm: add compatible string check for sdm670
  drm/amdgpu: fix initial connector audio value
  platform/x86: msi-laptop: Change DMI match / alias strings to fix module autoloading
  drm: panel-orientation-quirks: Add quirk for Anbernic Win600
  drm/vc4: vec: Fix timings for VEC modes
  drm/amd/display: fix overflow on MIN_I64 definition
  drm: Prevent drm_copy_field() to attempt copying a NULL pointer
  drm: Use size_t type for len variable in drm_copy_field()
  drm/nouveau/nouveau_bo: fix potential memory leak in nouveau_bo_alloc()
  r8152: Rate limit overflow messages
  Bluetooth: L2CAP: Fix user-after-free
  net: If sock is dead don't access sock's sk_wq in sk_stream_wait_memory
  wifi: rt2x00: correctly set BBP register 86 for MT7620
  wifi: rt2x00: set SoC wmac clock register
  wifi: rt2x00: set VGC gain for both chains of MT7620
  wifi: rt2x00: set correct TX_SW_CFG1 MAC register for MT7620
  wifi: rt2x00: don't run Rt5592 IQ calibration on MT7620
  can: bcm: check the result of can_send() in bcm_can_tx()
  Bluetooth: hci_sysfs: Fix attempting to call device_add multiple times
  Bluetooth: L2CAP: initialize delayed works at l2cap_chan_create()
  wifi: brcmfmac: fix use-after-free bug in brcmf_netdev_start_xmit()
  xfrm: Update ipcomp_scratches with NULL when freed
  wifi: ath9k: avoid uninit memory read in ath9k_htc_rx_msg()
  tcp: annotate data-race around tcp_md5sig_pool_populated
  openvswitch: Fix overreporting of drops in dropwatch
  openvswitch: Fix double reporting of drops in dropwatch
  bpftool: Clear errno after libcap's checks
  wifi: brcmfmac: fix invalid address access when enabling SCAN log level
  NFSD: Return nfserr_serverfault if splice_ok but buf->pages have data
  thermal: intel_powerclamp: Use get_cpu() instead of smp_processor_id() to avoid crash
  powercap: intel_rapl: fix UBSAN shift-out-of-bounds issue
  MIPS: BCM47XX: Cast memcmp() of function to (void *)
  ACPI: video: Add Toshiba Satellite/Portege Z830 quirk
  f2fs: fix race condition on setting FI_NO_EXTENT flag
  crypto: cavium - prevent integer overflow loading firmware
  kbuild: remove the target in signal traps when interrupted
  iommu/iova: Fix module config properly
  crypto: ccp - Release dma channels before dmaengine unrgister
  crypto: akcipher - default implementation for setting a private key
  iommu/omap: Fix buffer overflow in debugfs
  cgroup/cpuset: Enable update_tasks_cpumask() on top_cpuset
  powerpc: Fix SPE Power ISA properties for e500v1 platforms
  powerpc/64s: Fix GENERIC_CPU build flags for PPC970 / G5
  x86/hyperv: Fix 'struct hv_enlightened_vmcs' definition
  powerpc/powernv: add missing of_node_put() in opal_export_attrs()
  powerpc/pci_dn: Add missing of_node_put()
  powerpc/sysdev/fsl_msi: Add missing of_node_put()
  powerpc/math_emu/efp: Include module.h
  mailbox: bcm-ferxrm-mailbox: Fix error check for dma_map_sg
  clk: ast2600: BCLK comes from EPLL
  clk: ti: dra7-atl: Fix reference leak in of_dra7_atl_clk_probe
  clk: bcm2835: fix bcm2835_clock_rate_from_divisor declaration
  spmi: pmic-arb: correct duplicate APID to PPID mapping logic
  dmaengine: ioat: stop mod_timer from resurrecting deleted timer in __cleanup()
  clk: mediatek: mt8183: mfgcfg: Propagate rate changes to parent
  mfd: sm501: Add check for platform_driver_register()
  mfd: fsl-imx25: Fix check for platform_get_irq() errors
  mfd: lp8788: Fix an error handling path in lp8788_irq_init() and lp8788_irq_init()
  mfd: lp8788: Fix an error handling path in lp8788_probe()
  mfd: fsl-imx25: Fix an error handling path in mx25_tsadc_setup_irq()
  mfd: intel_soc_pmic: Fix an error handling path in intel_soc_pmic_i2c_probe()
  fsi: core: Check error number after calling ida_simple_get
  scsi: libsas: Fix use-after-free bug in smp_execute_task_sg()
  serial: 8250: Fix restoring termios speed after suspend
  firmware: google: Test spinlock on panic path to avoid lockups
  staging: vt6655: fix some erroneous memory clean-up loops
  phy: qualcomm: call clk_disable_unprepare in the error handling
  tty: serial: fsl_lpuart: disable dma rx/tx use flags in lpuart_dma_shutdown
  drivers: serial: jsm: fix some leaks in probe
  usb: gadget: function: fix dangling pnp_string in f_printer.c
  xhci: Don't show warning for reinit on known broken suspend
  md/raid5: Ensure stripe_fill happens on non-read IO with journal
  mtd: rawnand: meson: fix bit map use in meson_nfc_ecc_correct()
  ata: fix ata_id_has_dipm()
  ata: fix ata_id_has_ncq_autosense()
  ata: fix ata_id_has_devslp()
  ata: fix ata_id_sense_reporting_enabled() and ata_id_has_sense_reporting()
  RDMA/siw: Always consume all skbuf data in sk_data_ready() upcall.
  mtd: devices: docg3: check the return value of devm_ioremap() in the probe
  dyndbg: let query-modname override actual module name
  dyndbg: fix module.dyndbg handling
  misc: ocxl: fix possible refcount leak in afu_ioctl()
  RDMA/rxe: Fix the error caused by qp->sk
  RDMA/rxe: Fix "kernel NULL pointer dereference" error
  media: xilinx: vipp: Fix refcount leak in xvip_graph_dma_init
  tty: xilinx_uartps: Fix the ignore_status
  media: exynos4-is: fimc-is: Add of_node_put() when breaking out of loop
  HSI: omap_ssi_port: Fix dma_map_sg error check
  HSI: omap_ssi: Fix refcount leak in ssi_probe
  clk: tegra20: Fix refcount leak in tegra20_clock_init
  clk: tegra: Fix refcount leak in tegra114_clock_init
  clk: tegra: Fix refcount leak in tegra210_clock_init
  clk: berlin: Add of_node_put() for of_get_parent()
  clk: oxnas: Hold reference returned by of_get_parent()
  clk: meson: Hold reference returned by of_get_parent()
  iio: ABI: Fix wrong format of differential capacitance channel ABI.
  iio: inkern: only release the device node when done with it
  iio: adc: at91-sama5d2_adc: lock around oversampling and sample freq
  iio: adc: at91-sama5d2_adc: check return status for pressure and touch
  iio: adc: at91-sama5d2_adc: fix AT91_SAMA5D2_MR_TRACKTIM_MAX
  ARM: dts: exynos: fix polarity of VBUS GPIO of Origen
  ARM: Drop CMDLINE_* dependency on ATAGS
  ARM: dts: exynos: correct s5k6a3 reset polarity on Midas family
  ARM: dts: kirkwood: lsxl: remove first ethernet port
  ARM: dts: kirkwood: lsxl: fix serial line
  ARM: dts: turris-omnia: Fix mpp26 pin name and comment
  soc: qcom: smem_state: Add refcounting for the 'state->of_node'
  soc: qcom: smsm: Fix refcount leak bugs in qcom_smsm_probe()
  memory: of: Fix refcount leak bug in of_get_ddr_timings()
  memory: pl353-smc: Fix refcount leak bug in pl353_smc_probe()
  ALSA: hda/hdmi: Don't skip notification handling during PM operation
  ASoC: wm5102: Fix PM disable depth imbalance in wm5102_probe
  ASoC: wm5110: Fix PM disable depth imbalance in wm5110_probe
  ASoC: wm8997: Fix PM disable depth imbalance in wm8997_probe
  mmc: wmt-sdmmc: Fix an error handling path in wmt_mci_probe()
  ALSA: dmaengine: increment buffer pointer atomically
  drm/msm/dpu: index dpu_kms->hw_vbif using vbif_idx
  ASoC: eureka-tlv320: Hold reference returned from of_find_xxx API
  mmc: au1xmmc: Fix an error handling path in au1xmmc_probe()
  drm/omap: dss: Fix refcount leak bugs
  ALSA: hda: beep: Simplify keep-power-at-enable behavior
  ASoC: rsnd: Add check for rsnd_mod_power_on
  drm/bridge: megachips: Fix a null pointer dereference bug
  drm: fix drm_mipi_dbi build errors
  platform/x86: msi-laptop: Fix resource cleanup
  platform/x86: msi-laptop: Fix old-ec check for backlight registering
  platform/chrome: fix memory corruption in ioctl
  platform/chrome: fix double-free in chromeos_laptop_prepare()
  drm/mipi-dsi: Detach devices when removing the host
  drm: bridge: adv7511: fix CEC power down control register offset
  net: mvpp2: fix mvpp2 debugfs leak
  once: add DO_ONCE_SLOW() for sleepable contexts
  net/ieee802154: reject zero-sized raw_sendmsg()
  bnx2x: fix potential memory leak in bnx2x_tpa_stop()
  net: rds: don't hold sock lock when cancelling work from rds_tcp_reset_callbacks()
  tcp: fix tcp_cwnd_validate() to not forget is_cwnd_limited
  sctp: handle the error returned from sctp_auth_asoc_init_active_key
  mISDN: fix use-after-free bugs in l1oip timer handlers
  vhost/vsock: Use kvmalloc/kvfree for larger packets.
  spi: s3c64xx: Fix large transfers with DMA
  netfilter: nft_fib: Fix for rpath check with VRF devices
  spi/omap100k:Fix PM disable depth imbalance in omap1_spi100k_probe
  x86/microcode/AMD: Track patch allocation size explicitly
  bpf: Ensure correct locking around vulnerable function find_vpid()
  net: fs_enet: Fix wrong check in do_pd_setup
  wifi: rtl8xxxu: gen2: Fix mistake in path B IQ calibration
  bpf: btf: fix truncated last_member_type_id in btf_struct_resolve
  wifi: rtl8xxxu: Fix skb misuse in TX queue selection
  spi: qup: add missing clk_disable_unprepare on error in spi_qup_pm_resume_runtime()
  spi: qup: add missing clk_disable_unprepare on error in spi_qup_resume()
  wifi: rtl8xxxu: tighten bounds checking in rtl8xxxu_read_efuse()
  x86/resctrl: Fix to restore to original value when re-enabling hardware prefetch register
  bpftool: Fix a wrong type cast in btf_dumper_int
  wifi: mac80211: allow bw change during channel switch in mesh
  wifi: ath10k: add peer map clean up for peer delete in ath10k_sta_state()
  nfsd: Fix a memory leak in an error handling path
  ARM: 9247/1: mm: set readonly for MT_MEMORY_RO with ARM_LPAE
  sh: machvec: Use char[] for section boundaries
  userfaultfd: open userfaultfds with O_RDONLY
  tracing: Disable interrupt or preemption before acquiring arch_spinlock_t
  selinux: use "grep -E" instead of "egrep"
  drm/nouveau: fix a use-after-free in nouveau_gem_prime_import_sg_table()
  gcov: support GCC 12.1 and newer compilers
  KVM: VMX: Drop bits 31:16 when shoving exception error code into VMCS
  KVM: nVMX: Unconditionally purge queued/injected events on nested "exit"
  KVM: x86/emulator: Fix handing of POP SS to correctly set interruptibility
  media: cedrus: Set the platform driver data earlier
  ring-buffer: Fix race between reset page and reading page
  ring-buffer: Check pending waiters when doing wake ups as well
  ring-buffer: Have the shortest_full queue be the shortest not longest
  ring-buffer: Allow splice to read previous partially read pages
  ftrace: Properly unset FTRACE_HASH_FL_MOD
  livepatch: fix race between fork and KLP transition
  ext4: place buffer head allocation before handle start
  ext4: make ext4_lazyinit_thread freezable
  ext4: fix null-ptr-deref in ext4_write_info
  ext4: avoid crash when inline data creation follows DIO write
  jbd2: wake up journal waiters in FIFO order, not LIFO
  nilfs2: fix use-after-free bug of struct nilfs_root
  f2fs: fix to do sanity check on summary info
  f2fs: fix to do sanity check on destination blkaddr during recovery
  f2fs: increase the limit for reserve_root
  btrfs: fix race between quota enable and quota rescan ioctl
  fbdev: smscufx: Fix use-after-free in ufx_ops_open()
  powerpc/boot: Explicitly disable usage of SPE instructions
  PCI: Sanitise firmware BAR assignments behind a PCI-PCI bridge
  UM: cpuinfo: Fix a warning for CONFIG_CPUMASK_OFFSTACK
  riscv: Pass -mno-relax only on lld < 15.0.0
  riscv: Allow PROT_WRITE-only mmap()
  parisc: fbdev/stifb: Align graphics memory size to 4MB
  RISC-V: Make port I/O string accessors actually work
  regulator: qcom_rpm: Fix circular deferral regression
  ASoC: wcd9335: fix order of Slimbus unprepare/disable
  quota: Check next/prev free block number after reading from quota file
  HID: multitouch: Add memory barriers
  fs: dlm: handle -EBUSY first in lock arg validation
  fs: dlm: fix race between test_bit() and queue_work()
  mmc: sdhci-sprd: Fix minimum clock limit
  can: kvaser_usb_leaf: Fix CAN state after restart
  can: kvaser_usb_leaf: Fix TX queue out of sync after restart
  can: kvaser_usb_leaf: Fix overread with an invalid command
  can: kvaser_usb: Fix use of uninitialized completion
  usb: add quirks for Lenovo OneLink+ Dock
  iio: pressure: dps310: Reset chip after timeout
  iio: pressure: dps310: Refactor startup procedure
  iio: dac: ad5593r: Fix i2c read protocol requirements
  cifs: Fix the error length of VALIDATE_NEGOTIATE_INFO message
  cifs: destage dirty pages before re-reading them for cache=none
  mtd: rawnand: atmel: Unmap streaming DMA mappings
  ALSA: hda/realtek: Add Intel Reference SSID to support headset keys
  ALSA: hda/realtek: Add quirk for ASUS GV601R laptop
  ALSA: hda/realtek: Correct pin configs for ASUS G533Z
  ALSA: hda/realtek: remove ALC289_FIXUP_DUAL_SPK for Dell 5530
  ALSA: usb-audio: Fix NULL dererence at error path
  ALSA: usb-audio: Fix potential memory leaks
  ALSA: rawmidi: Drop register_mutex in snd_rawmidi_free()
  ALSA: oss: Fix potential deadlock at unregistration

 Conflicts:
	drivers/android/binder.c
	include/linux/rmap.h
	kernel/cgroup/cpuset.c
	mm/rmap.c

Change-Id: I34fe2d5c9b0d5844560de9c983867511b5d57265
2023-02-08 18:28:38 +02:00

2795 lines
73 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Implementation of the Transmission Control Protocol(TCP).
*
* IPv4 specific functions
*
* code split from:
* linux/ipv4/tcp.c
* linux/ipv4/tcp_input.c
* linux/ipv4/tcp_output.c
*
* See tcp.c for author information
*/
/*
* Changes:
* David S. Miller : New socket lookup architecture.
* This code is dedicated to John Dyson.
* David S. Miller : Change semantics of established hash,
* half is devoted to TIME_WAIT sockets
* and the rest go in the other half.
* Andi Kleen : Add support for syncookies and fixed
* some bugs: ip options weren't passed to
* the TCP layer, missed a check for an
* ACK bit.
* Andi Kleen : Implemented fast path mtu discovery.
* Fixed many serious bugs in the
* request_sock handling and moved
* most of it into the af independent code.
* Added tail drop and some other bugfixes.
* Added new listen semantics.
* Mike McLagan : Routing by source
* Juan Jose Ciarlante: ip_dynaddr bits
* Andi Kleen: various fixes.
* Vitaly E. Lavrov : Transparent proxy revived after year
* coma.
* Andi Kleen : Fix new listen.
* Andi Kleen : Fix accept error reporting.
* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
* a single port at the same time.
*/
#define pr_fmt(fmt) "TCP: " fmt
#include <linux/bottom_half.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/cache.h>
#include <linux/jhash.h>
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/inet_hashtables.h>
#include <net/tcp.h>
#include <net/transp_v6.h>
#include <net/ipv6.h>
#include <net/inet_common.h>
#include <net/timewait_sock.h>
#include <net/xfrm.h>
#include <net/secure_seq.h>
#include <net/busy_poll.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
#include <linux/stddef.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/inetdevice.h>
#include <crypto/hash.h>
#include <linux/scatterlist.h>
#include <trace/events/tcp.h>
#ifdef CONFIG_TCP_MD5SIG
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
__be32 daddr, __be32 saddr, const struct tcphdr *th);
#endif
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
static u32 tcp_v4_init_seq(const struct sk_buff *skb)
{
return secure_tcp_seq(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr,
tcp_hdr(skb)->dest,
tcp_hdr(skb)->source);
}
static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
{
return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
if (reuse == 2) {
/* Still does not detect *everything* that goes through
* lo, since we require a loopback src or dst address
* or direct binding to 'lo' interface.
*/
bool loopback = false;
if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
loopback = true;
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
(ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
(tw->tw_v6_daddr.s6_addr[12] == 127)) ||
ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
(ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
(tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
loopback = true;
} else
#endif
{
if (ipv4_is_loopback(tw->tw_daddr) ||
ipv4_is_loopback(tw->tw_rcv_saddr))
loopback = true;
}
if (!loopback)
reuse = 0;
}
/* With PAWS, it is safe from the viewpoint
of data integrity. Even without PAWS it is safe provided sequence
spaces do not overlap i.e. at data rates <= 80Mbit/sec.
Actually, the idea is close to VJ's one, only timestamp cache is
held not per host, but per port pair and TW bucket is used as state
holder.
If TW bucket has been already destroyed we fall back to VJ's scheme
and use initial timestamp retrieved from peer table.
*/
if (tcptw->tw_ts_recent_stamp &&
(!twp || (reuse && time_after32(ktime_get_seconds(),
tcptw->tw_ts_recent_stamp)))) {
/* In case of repair and re-using TIME-WAIT sockets we still
* want to be sure that it is safe as above but honor the
* sequence numbers and time stamps set as part of the repair
* process.
*
* Without this check re-using a TIME-WAIT socket with TCP
* repair would accumulate a -1 on the repair assigned
* sequence number. The first time it is reused the sequence
* is -1, the second time -2, etc. This fixes that issue
* without appearing to create any others.
*/
if (likely(!tp->repair)) {
u32 seq = tcptw->tw_snd_nxt + 65535 + 2;
if (!seq)
seq = 1;
WRITE_ONCE(tp->write_seq, seq);
tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
}
sock_hold(sktw);
return 1;
}
return 0;
}
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* This check is replicated from tcp_v4_connect() and intended to
* prevent BPF program called below from accessing bytes that are out
* of the bound specified by user in addr_len.
*/
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
sock_owned_by_me(sk);
return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
}
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__be16 orig_sport, orig_dport;
__be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt;
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT;
nexthop = daddr = usin->sin_addr.s_addr;
inet_opt = rcu_dereference_protected(inet->inet_opt,
lockdep_sock_is_held(sk));
if (inet_opt && inet_opt->opt.srr) {
if (!daddr)
return -EINVAL;
nexthop = inet_opt->opt.faddr;
}
orig_sport = inet->inet_sport;
orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
orig_sport, orig_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return err;
}
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
}
if (!inet_opt || !inet_opt->opt.srr)
daddr = fl4->daddr;
if (!inet->inet_saddr)
inet->inet_saddr = fl4->saddr;
sk_rcv_saddr_set(sk, inet->inet_saddr);
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
if (likely(!tp->repair))
WRITE_ONCE(tp->write_seq, 0);
}
inet->inet_dport = usin->sin_port;
sk_daddr_set(sk, daddr);
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
/* Socket identity is still unknown (sport may be zero).
* However we set state to SYN-SENT and not releasing socket
* lock select source port, enter ourselves into the hash tables and
* complete initialization after this.
*/
tcp_set_state(sk, TCP_SYN_SENT);
err = inet_hash_connect(tcp_death_row, sk);
if (err)
goto failure;
sk_set_txhash(sk);
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
goto failure;
}
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst);
rt = NULL;
if (likely(!tp->repair)) {
if (!tp->write_seq)
WRITE_ONCE(tp->write_seq,
secure_tcp_seq(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
usin->sin_port));
tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
inet->inet_saddr,
inet->inet_daddr);
}
inet->inet_id = prandom_u32();
if (tcp_fastopen_defer_connect(sk, &err))
return err;
if (err)
goto failure;
err = tcp_connect(sk);
if (err)
goto failure;
return 0;
failure:
/*
* This unhashes the socket and releases the local port,
* if necessary.
*/
tcp_set_state(sk, TCP_CLOSE);
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
return err;
}
EXPORT_SYMBOL(tcp_v4_connect);
/*
* This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
* It can be called through tcp_release_cb() if socket was owned by user
* at the time tcp_v4_err() was called to handle ICMP message.
*/
void tcp_v4_mtu_reduced(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
struct dst_entry *dst;
u32 mtu;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
/* Something is about to be wrong... Remember soft error
* for the case, if this connection will not able to recover.
*/
if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
sk->sk_err_soft = EMSGSIZE;
mtu = dst_mtu(dst);
if (inet->pmtudisc != IP_PMTUDISC_DONT &&
ip_sk_accept_pmtu(sk) &&
inet_csk(sk)->icsk_pmtu_cookie > mtu) {
tcp_sync_mss(sk, mtu);
/* Resend the TCP packet because it's
* clear that the old packet has been
* dropped. This is the new "fast" path mtu
* discovery.
*/
tcp_simple_retransmit(sk);
} /* else let the usual retransmit timer handle it */
}
EXPORT_SYMBOL(tcp_v4_mtu_reduced);
static void do_redirect(struct sk_buff *skb, struct sock *sk)
{
struct dst_entry *dst = __sk_dst_check(sk, 0);
if (dst)
dst->ops->redirect(dst, sk, skb);
}
/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
void tcp_req_err(struct sock *sk, u32 seq, bool abort)
{
struct request_sock *req = inet_reqsk(sk);
struct net *net = sock_net(sk);
/* ICMPs are not backlogged, hence we cannot get
* an established socket here.
*/
if (seq != tcp_rsk(req)->snt_isn) {
__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
} else if (abort) {
/*
* Still in SYN_RECV, just remove it silently.
* There is no good way to pass the error to the newly
* created socket, and POSIX does not want network
* errors returned from accept().
*/
inet_csk_reqsk_queue_drop(req->rsk_listener, req);
tcp_listendrop(req->rsk_listener);
}
reqsk_put(req);
}
EXPORT_SYMBOL(tcp_req_err);
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
* be closed and the error returned to the user. If err > 0
* it's just the icmp type << 8 | icmp code. After adjustment
* header points to the first 8 bytes of the tcp header. We need
* to find the appropriate port.
*
* The locking strategy used here is very "optimistic". When
* someone else accesses the socket the ICMP is just dropped
* and for some paths there is no check at all.
* A more general error queue to queue errors for later handling
* is probably better.
*
*/
int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
{
const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
struct inet_connection_sock *icsk;
struct tcp_sock *tp;
struct inet_sock *inet;
const int type = icmp_hdr(icmp_skb)->type;
const int code = icmp_hdr(icmp_skb)->code;
struct sock *sk;
struct sk_buff *skb;
struct request_sock *fastopen;
u32 seq, snd_una;
s32 remaining;
u32 delta_us;
int err;
struct net *net = dev_net(icmp_skb->dev);
sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
th->dest, iph->saddr, ntohs(th->source),
inet_iif(icmp_skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return -ENOENT;
}
if (sk->sk_state == TCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
return 0;
}
seq = ntohl(th->seq);
if (sk->sk_state == TCP_NEW_SYN_RECV) {
tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB ||
type == ICMP_TIME_EXCEEDED ||
(type == ICMP_DEST_UNREACH &&
(code == ICMP_NET_UNREACH ||
code == ICMP_HOST_UNREACH)));
return 0;
}
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
* servers this needs to be solved differently.
* We do take care of PMTU discovery (RFC1191) special case :
* we can receive locally generated ICMP messages while socket is held.
*/
if (sock_owned_by_user(sk)) {
if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
}
if (sk->sk_state == TCP_CLOSE)
goto out;
if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto out;
}
icsk = inet_csk(sk);
tp = tcp_sk(sk);
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
fastopen = rcu_dereference(tp->fastopen_rsk);
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
!between(seq, snd_una, tp->snd_nxt)) {
__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
switch (type) {
case ICMP_REDIRECT:
if (!sock_owned_by_user(sk))
do_redirect(icmp_skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
goto out;
case ICMP_PARAMETERPROB:
err = EPROTO;
break;
case ICMP_DEST_UNREACH:
if (code > NR_ICMP_UNREACH)
goto out;
if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
/* We are not interested in TCP_LISTEN and open_requests
* (SYN-ACKs send out by Linux are always <576bytes so
* they should go through unfragmented).
*/
if (sk->sk_state == TCP_LISTEN)
goto out;
WRITE_ONCE(tp->mtu_info, info);
if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
} else {
if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
sock_hold(sk);
}
goto out;
}
err = icmp_err_convert[code].errno;
/* check if icmp_skb allows revert of backoff
* (see draft-zimmermann-tcp-lcd) */
if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
break;
if (seq != tp->snd_una || !icsk->icsk_retransmits ||
!icsk->icsk_backoff || fastopen)
break;
if (sock_owned_by_user(sk))
break;
skb = tcp_rtx_queue_head(sk);
if (WARN_ON_ONCE(!skb))
break;
icsk->icsk_backoff--;
icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
TCP_TIMEOUT_INIT;
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
tcp_mstamp_refresh(tp);
delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
remaining = icsk->icsk_rto -
usecs_to_jiffies(delta_us);
if (remaining > 0) {
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
remaining, TCP_RTO_MAX);
} else {
/* RTO revert clocked out retransmission.
* Will retransmit now */
tcp_retransmit_timer(sk);
}
break;
case ICMP_TIME_EXCEEDED:
err = EHOSTUNREACH;
break;
default:
goto out;
}
switch (sk->sk_state) {
case TCP_SYN_SENT:
case TCP_SYN_RECV:
/* Only in fast or simultaneous open. If a fast open socket is
* is already accepted it is treated as a connected one below.
*/
if (fastopen && !fastopen->sk)
break;
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
sk->sk_error_report(sk);
tcp_done(sk);
} else {
sk->sk_err_soft = err;
}
goto out;
}
/* If we've already connected we will keep trying
* until we time out, or the user gives up.
*
* rfc1122 4.2.3.9 allows to consider as hard errors
* only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
* but it is obsoleted by pmtu discovery).
*
* Note, that in modern internet, where routing is unreliable
* and in each dark corner broken firewalls sit, sending random
* errors ordered by their masters even this two messages finally lose
* their original sense (even Linux sends invalid PORT_UNREACHs)
*
* Now we are in compliance with RFCs.
* --ANK (980905)
*/
inet = inet_sk(sk);
if (!sock_owned_by_user(sk) && inet->recverr) {
sk->sk_err = err;
sk->sk_error_report(sk);
} else { /* Only an error on timeout */
sk->sk_err_soft = err;
}
out:
bh_unlock_sock(sk);
sock_put(sk);
return 0;
}
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct tcphdr *th = tcp_hdr(skb);
th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
}
/* This routine computes an IPv4 TCP checksum. */
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
{
const struct inet_sock *inet = inet_sk(sk);
__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
}
EXPORT_SYMBOL(tcp_v4_send_check);
/*
* This routine will send an RST to the other tcp.
*
* Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
* for reset.
* Answer: if a packet caused RST, it is not for a socket
* existing in our system, if it is matched to a socket,
* it is just duplicate segment or bug in other side's TCP.
* So that we build reply only basing on parameters
* arrived with segment.
* Exception: precedence violation. We do not implement it in any case.
*/
static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
#ifdef CONFIG_TCP_MD5SIG
__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
#endif
} rep;
struct ip_reply_arg arg;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key = NULL;
const __u8 *hash_location = NULL;
unsigned char newhash[16];
int genhash;
struct sock *sk1 = NULL;
#endif
u64 transmit_time = 0;
struct sock *ctl_sk;
struct net *net;
/* Never send a reset in response to a reset. */
if (th->rst)
return;
/* If sk not NULL, it means we did a successful lookup and incoming
* route had to be correct. prequeue might have dropped our dst.
*/
if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
return;
/* Swap the send and the receive. */
memset(&rep, 0, sizeof(rep));
rep.th.dest = th->source;
rep.th.source = th->dest;
rep.th.doff = sizeof(struct tcphdr) / 4;
rep.th.rst = 1;
if (th->ack) {
rep.th.seq = th->ack_seq;
} else {
rep.th.ack = 1;
rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
skb->len - (th->doff << 2));
}
memset(&arg, 0, sizeof(arg));
arg.iov[0].iov_base = (unsigned char *)&rep;
arg.iov[0].iov_len = sizeof(rep.th);
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
#ifdef CONFIG_TCP_MD5SIG
rcu_read_lock();
hash_location = tcp_parse_md5sig_option(th);
if (sk && sk_fullsock(sk)) {
key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
&ip_hdr(skb)->saddr, AF_INET);
} else if (hash_location) {
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
* we are not loose security here:
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
ntohs(th->source), inet_iif(skb),
tcp_v4_sdif(skb));
/* don't send rst if it can't find key */
if (!sk1)
goto out;
key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
&ip_hdr(skb)->saddr, AF_INET);
if (!key)
goto out;
genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0)
goto out;
}
if (key) {
rep.opt[0] = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) |
TCPOLEN_MD5SIG);
/* Update length and the length the header thinks exists */
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
rep.th.doff = arg.iov[0].iov_len / 4;
tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
key, ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, &rep.th);
}
#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
/* When socket is gone, all binding information is lost.
* routing might fail in this case. No choice here, if we choose to force
* input interface, we will misroute in case of asymmetric route.
*/
if (sk) {
arg.bound_dev_if = sk->sk_bound_dev_if;
if (sk_fullsock(sk))
trace_tcp_send_reset(sk, skb);
}
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
arg.tos = ip_hdr(skb)->tos;
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
if (sk) {
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_priority : sk->sk_priority;
transmit_time = tcp_transmit_time(sk);
}
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len,
transmit_time);
ctl_sk->sk_mark = 0;
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
local_bh_enable();
#ifdef CONFIG_TCP_MD5SIG
out:
rcu_read_unlock();
#endif
}
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
outside socket context is ugly, certainly. What can I do?
*/
static void tcp_v4_send_ack(const struct sock *sk,
struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
#ifdef CONFIG_TCP_MD5SIG
+ (TCPOLEN_MD5SIG_ALIGNED >> 2)
#endif
];
} rep;
struct net *net = sock_net(sk);
struct ip_reply_arg arg;
struct sock *ctl_sk;
u64 transmit_time;
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
arg.iov[0].iov_base = (unsigned char *)&rep;
arg.iov[0].iov_len = sizeof(rep.th);
if (tsecr) {
rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
rep.opt[1] = htonl(tsval);
rep.opt[2] = htonl(tsecr);
arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
}
/* Swap the send and the receive. */
rep.th.dest = th->source;
rep.th.source = th->dest;
rep.th.doff = arg.iov[0].iov_len / 4;
rep.th.seq = htonl(seq);
rep.th.ack_seq = htonl(ack);
rep.th.ack = 1;
rep.th.window = htons(win);
#ifdef CONFIG_TCP_MD5SIG
if (key) {
int offset = (tsecr) ? 3 : 0;
rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) |
TCPOLEN_MD5SIG);
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
rep.th.doff = arg.iov[0].iov_len/4;
tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
key, ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr, &rep.th);
}
#endif
arg.flags = reply_flags;
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
if (oif)
arg.bound_dev_if = oif;
arg.tos = tos;
arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_priority : sk->sk_priority;
transmit_time = tcp_transmit_time(sk);
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len,
transmit_time);
ctl_sk->sk_mark = 0;
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
local_bh_enable();
}
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
tcp_v4_send_ack(sk, skb,
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw),
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
tw->tw_tos
);
inet_twsk_put(tw);
}
static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
tcp_sk(sk)->snd_nxt;
/* RFC 7323 2.3
* The window field (SEG.WND) of every outgoing segment, with the
* exception of <SYN> segments, MUST be right-shifted by
* Rcv.Wind.Shift bits:
*/
tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
}
/*
* Send a SYN-ACK after having received a SYN.
* This still operates on a request_sock only, not on a big
* socket.
*/
static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct flowi4 fl4;
int err = -1;
struct sk_buff *skb;
/* First, grab a route. */
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
skb = tcp_make_synack(sk, dst, req, foc, synack_type);
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
rcu_read_lock();
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
rcu_dereference(ireq->ireq_opt));
rcu_read_unlock();
err = net_xmit_eval(err);
}
return err;
}
/*
* IPv4 request_sock destructor.
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
#ifdef CONFIG_TCP_MD5SIG
/*
* RFC2385 MD5 checksumming requires a mapping of
* IP address->MD5 Key.
* We need to maintain these in the sk structure.
*/
DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
EXPORT_SYMBOL(tcp_md5_needed);
/* Find the Key structure for an address. */
struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
const union tcp_md5_addr *addr,
int family)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
const struct tcp_md5sig_info *md5sig;
__be32 mask;
struct tcp_md5sig_key *best_match = NULL;
bool match;
/* caller either holds rcu_read_lock() or socket lock */
md5sig = rcu_dereference_check(tp->md5sig_info,
lockdep_sock_is_held(sk));
if (!md5sig)
return NULL;
hlist_for_each_entry_rcu(key, &md5sig->head, node) {
if (key->family != family)
continue;
if (family == AF_INET) {
mask = inet_make_mask(key->prefixlen);
match = (key->addr.a4.s_addr & mask) ==
(addr->a4.s_addr & mask);
#if IS_ENABLED(CONFIG_IPV6)
} else if (family == AF_INET6) {
match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
key->prefixlen);
#endif
} else {
match = false;
}
if (match && (!best_match ||
key->prefixlen > best_match->prefixlen))
best_match = key;
}
return best_match;
}
EXPORT_SYMBOL(__tcp_md5_do_lookup);
static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
const union tcp_md5_addr *addr,
int family, u8 prefixlen)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
unsigned int size = sizeof(struct in_addr);
const struct tcp_md5sig_info *md5sig;
/* caller either holds rcu_read_lock() or socket lock */
md5sig = rcu_dereference_check(tp->md5sig_info,
lockdep_sock_is_held(sk));
if (!md5sig)
return NULL;
#if IS_ENABLED(CONFIG_IPV6)
if (family == AF_INET6)
size = sizeof(struct in6_addr);
#endif
hlist_for_each_entry_rcu(key, &md5sig->head, node) {
if (key->family != family)
continue;
if (!memcmp(&key->addr, addr, size) &&
key->prefixlen == prefixlen)
return key;
}
return NULL;
}
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
const union tcp_md5_addr *addr;
addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
return tcp_md5_do_lookup(sk, addr, AF_INET);
}
EXPORT_SYMBOL(tcp_v4_md5_lookup);
/* This can be called on a newly created socket, from other files */
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
gfp_t gfp)
{
/* Add Key to the list */
struct tcp_md5sig_key *key;
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_info *md5sig;
key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
if (key) {
/* Pre-existing entry - just update that one.
* Note that the key might be used concurrently.
*/
memcpy(key->key, newkey, newkeylen);
/* Pairs with READ_ONCE() in tcp_md5_hash_key().
* Also note that a reader could catch new key->keylen value
* but old key->key[], this is the reason we use __GFP_ZERO
* at sock_kmalloc() time below these lines.
*/
WRITE_ONCE(key->keylen, newkeylen);
return 0;
}
md5sig = rcu_dereference_protected(tp->md5sig_info,
lockdep_sock_is_held(sk));
if (!md5sig) {
md5sig = kmalloc(sizeof(*md5sig), gfp);
if (!md5sig)
return -ENOMEM;
sk_nocaps_add(sk, NETIF_F_GSO_MASK);
INIT_HLIST_HEAD(&md5sig->head);
rcu_assign_pointer(tp->md5sig_info, md5sig);
}
key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO);
if (!key)
return -ENOMEM;
if (!tcp_alloc_md5sig_pool()) {
sock_kfree_s(sk, key, sizeof(*key));
return -ENOMEM;
}
memcpy(key->key, newkey, newkeylen);
key->keylen = newkeylen;
key->family = family;
key->prefixlen = prefixlen;
memcpy(&key->addr, addr,
(family == AF_INET6) ? sizeof(struct in6_addr) :
sizeof(struct in_addr));
hlist_add_head_rcu(&key->node, &md5sig->head);
return 0;
}
EXPORT_SYMBOL(tcp_md5_do_add);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
u8 prefixlen)
{
struct tcp_md5sig_key *key;
key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
if (!key)
return -ENOENT;
hlist_del_rcu(&key->node);
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
kfree_rcu(key, rcu);
return 0;
}
EXPORT_SYMBOL(tcp_md5_do_del);
static void tcp_clear_md5_list(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
struct hlist_node *n;
struct tcp_md5sig_info *md5sig;
md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
hlist_del_rcu(&key->node);
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
kfree_rcu(key, rcu);
}
}
static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
char __user *optval, int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
u8 prefixlen = 32;
if (optlen < sizeof(cmd))
return -EINVAL;
if (copy_from_user(&cmd, optval, sizeof(cmd)))
return -EFAULT;
if (sin->sin_family != AF_INET)
return -EINVAL;
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
prefixlen = cmd.tcpm_prefixlen;
if (prefixlen > 32)
return -EINVAL;
}
if (!cmd.tcpm_keylen)
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
AF_INET, prefixlen);
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
GFP_KERNEL);
}
static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
__be32 daddr, __be32 saddr,
const struct tcphdr *th, int nbytes)
{
struct tcp4_pseudohdr *bp;
struct scatterlist sg;
struct tcphdr *_th;
bp = hp->scratch;
bp->saddr = saddr;
bp->daddr = daddr;
bp->pad = 0;
bp->protocol = IPPROTO_TCP;
bp->len = cpu_to_be16(nbytes);
_th = (struct tcphdr *)(bp + 1);
memcpy(_th, th, sizeof(*th));
_th->check = 0;
sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
ahash_request_set_crypt(hp->md5_req, &sg, NULL,
sizeof(*bp) + sizeof(*th));
return crypto_ahash_update(hp->md5_req);
}
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
__be32 daddr, __be32 saddr, const struct tcphdr *th)
{
struct tcp_md5sig_pool *hp;
struct ahash_request *req;
hp = tcp_get_md5sig_pool();
if (!hp)
goto clear_hash_noput;
req = hp->md5_req;
if (crypto_ahash_init(req))
goto clear_hash;
if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
ahash_request_set_crypt(req, NULL, md5_hash, 0);
if (crypto_ahash_final(req))
goto clear_hash;
tcp_put_md5sig_pool();
return 0;
clear_hash:
tcp_put_md5sig_pool();
clear_hash_noput:
memset(md5_hash, 0, 16);
return 1;
}
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk,
const struct sk_buff *skb)
{
struct tcp_md5sig_pool *hp;
struct ahash_request *req;
const struct tcphdr *th = tcp_hdr(skb);
__be32 saddr, daddr;
if (sk) { /* valid for establish/request sockets */
saddr = sk->sk_rcv_saddr;
daddr = sk->sk_daddr;
} else {
const struct iphdr *iph = ip_hdr(skb);
saddr = iph->saddr;
daddr = iph->daddr;
}
hp = tcp_get_md5sig_pool();
if (!hp)
goto clear_hash_noput;
req = hp->md5_req;
if (crypto_ahash_init(req))
goto clear_hash;
if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
goto clear_hash;
if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
ahash_request_set_crypt(req, NULL, md5_hash, 0);
if (crypto_ahash_final(req))
goto clear_hash;
tcp_put_md5sig_pool();
return 0;
clear_hash:
tcp_put_md5sig_pool();
clear_hash_noput:
memset(md5_hash, 0, 16);
return 1;
}
EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
#endif
/* Called with rcu_read_lock() */
static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_TCP_MD5SIG
/*
* This gets called for each TCP segment that arrives
* so we want to be efficient.
* We have 3 drop cases:
* o No MD5 hash and one expected.
* o MD5 hash and we're not expecting one.
* o MD5 hash and its wrong.
*/
const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
const struct iphdr *iph = ip_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
int genhash;
unsigned char newhash[16];
hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
AF_INET);
hash_location = tcp_parse_md5sig_option(th);
/* We've parsed the options - do we have a hash? */
if (!hash_expected && !hash_location)
return false;
if (hash_expected && !hash_location) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
return true;
}
if (!hash_expected && hash_location) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
return true;
}
/* Okay, so this is hash_expected and hash_location -
* so we need to calculate the checksum.
*/
genhash = tcp_v4_md5_hash_skb(newhash,
hash_expected,
NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
&iph->saddr, ntohs(th->source),
&iph->daddr, ntohs(th->dest),
genhash ? " tcp_v4_calc_md5_hash failed"
: "");
return true;
}
return false;
#endif
return false;
}
static void tcp_v4_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct net *net = sock_net(sk_listener);
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
}
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct flowi *fl,
const struct request_sock *req)
{
return inet_csk_route_req(sk, &fl->u.ip4, req);
}
struct request_sock_ops tcp_request_sock_ops __read_mostly = {
.family = PF_INET,
.obj_size = sizeof(struct tcp_request_sock),
.rtx_syn_ack = tcp_rtx_synack,
.send_ack = tcp_v4_reqsk_send_ack,
.destructor = tcp_v4_reqsk_destructor,
.send_reset = tcp_v4_send_reset,
.syn_ack_timeout = tcp_syn_ack_timeout,
};
const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.mss_clamp = TCP_MSS_DEFAULT,
#ifdef CONFIG_TCP_MD5SIG
.req_md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
#endif
.init_req = tcp_v4_init_req,
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v4_init_sequence,
#endif
.route_req = tcp_v4_route_req,
.init_seq = tcp_v4_init_seq,
.init_ts_off = tcp_v4_init_ts_off,
.send_synack = tcp_v4_send_synack,
};
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
/* Never answer to SYNs send to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
goto drop;
return tcp_conn_request(&tcp_request_sock_ops,
&tcp_request_sock_ipv4_ops, sk, skb);
drop:
tcp_listendrop(sk);
return 0;
}
EXPORT_SYMBOL(tcp_v4_conn_request);
/*
* The three way handshake has completed - we got a valid synack -
* now create the new socket.
*/
struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst,
struct request_sock *req_unhash,
bool *own_req)
{
struct inet_request_sock *ireq;
bool found_dup_sk = false;
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
struct ip_options_rcu *inet_opt;
if (sk_acceptq_is_full(sk))
goto exit_overflow;
newsk = tcp_create_openreq_child(sk, req, skb);
if (!newsk)
goto exit_nonewsk;
newsk->sk_gso_type = SKB_GSO_TCPV4;
inet_sk_rx_dst_set(newsk, skb);
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newsk->sk_bound_dev_if = ireq->ir_iif;
newinet->inet_saddr = ireq->ir_loc_addr;
inet_opt = rcu_dereference(ireq->ireq_opt);
RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
newinet->inet_id = prandom_u32();
if (!dst) {
dst = inet_csk_route_child_sock(sk, newsk, req);
if (!dst)
goto put_and_exit;
} else {
/* syncookie case : see end of cookie_v4_check() */
}
sk_setup_caps(newsk, dst);
tcp_ca_openreq_child(newsk, dst);
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
tcp_initialize_rcv_mss(newsk);
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
AF_INET);
if (key) {
/*
* We're using one, so create a matching key
* on the newsk structure. If we fail to get
* memory, then we end up not copying the key
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
}
#endif
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
&found_dup_sk);
if (likely(*own_req)) {
tcp_move_syn(newtp, req);
ireq->ireq_opt = NULL;
} else {
newinet->inet_opt = NULL;
if (!req_unhash && found_dup_sk) {
/* This code path should only be executed in the
* syncookie case only
*/
bh_unlock_sock(newsk);
sock_put(newsk);
newsk = NULL;
}
}
return newsk;
exit_overflow:
NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
exit_nonewsk:
dst_release(dst);
exit:
tcp_listendrop(sk);
return NULL;
put_and_exit:
newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
tcp_done(newsk);
goto exit;
}
EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
{
#ifdef CONFIG_SYN_COOKIES
const struct tcphdr *th = tcp_hdr(skb);
if (!th->syn)
sk = cookie_v4_check(sk, skb);
#endif
return sk;
}
u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
struct tcphdr *th, u32 *cookie)
{
u16 mss = 0;
#ifdef CONFIG_SYN_COOKIES
mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
&tcp_request_sock_ipv4_ops, sk, th);
if (mss) {
*cookie = __cookie_v4_init_sequence(iph, th, &mss);
tcp_synq_overflow(sk);
}
#endif
return mss;
}
/* The socket must have it's spinlock held when we get
* here, unless it is a TCP_LISTEN socket.
*
* We have a potential double-lock case here, so even when
* doing backlog processing we use the BH locking scheme.
* This is because we cannot sleep with the original spinlock
* held.
*/
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct sock *rsk;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst;
dst = rcu_dereference_protected(sk->sk_rx_dst,
lockdep_sock_is_held(sk));
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
!dst->ops->check(dst, 0)) {
RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
dst_release(dst);
}
}
tcp_rcv_established(sk, skb);
return 0;
}
if (tcp_checksum_complete(skb))
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
struct sock *nsk = tcp_v4_cookie_check(sk, skb);
if (!nsk)
goto discard;
if (nsk != sk) {
if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk;
goto reset;
}
return 0;
}
} else
sock_rps_save_rxhash(sk, skb);
if (tcp_rcv_state_process(sk, skb)) {
rsk = sk;
goto reset;
}
return 0;
reset:
tcp_v4_send_reset(rsk, skb);
discard:
kfree_skb(skb);
/* Be careful here. If this function gets more complicated and
* gcc suffers from register pressure on the x86, sk (in %ebx)
* might be destroyed here. This current version compiles correctly,
* but you have been warned.
*/
return 0;
csum_err:
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
EXPORT_SYMBOL(tcp_v4_do_rcv);
int tcp_v4_early_demux(struct sk_buff *skb)
{
const struct iphdr *iph;
const struct tcphdr *th;
struct sock *sk;
if (skb->pkt_type != PACKET_HOST)
return 0;
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
return 0;
iph = ip_hdr(skb);
th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr) / 4)
return 0;
sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
iph->saddr, th->source,
iph->daddr, ntohs(th->dest),
skb->skb_iif, inet_sdif(skb));
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
if (sk_fullsock(sk)) {
struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, 0);
if (dst &&
inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
}
}
return 0;
}
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
u32 tail_gso_size, tail_gso_segs;
struct skb_shared_info *shinfo;
const struct tcphdr *th;
struct tcphdr *thtail;
struct sk_buff *tail;
unsigned int hdrlen;
bool fragstolen;
u32 gso_segs;
u32 gso_size;
int delta;
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
* we can fix skb->truesize to its real value to avoid future drops.
* This is valid because skb is not yet charged to the socket.
* It has been noticed pure SACK packets were sometimes dropped
* (if cooked by drivers without copybreak feature).
*/
skb_condense(skb);
skb_dst_drop(skb);
if (unlikely(tcp_checksum_complete(skb))) {
bh_unlock_sock(sk);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
return true;
}
/* Attempt coalescing to last skb in backlog, even if we are
* above the limits.
* This is okay because skb capacity is limited to MAX_SKB_FRAGS.
*/
th = (const struct tcphdr *)skb->data;
hdrlen = th->doff * 4;
tail = sk->sk_backlog.tail;
if (!tail)
goto no_coalesce;
thtail = (struct tcphdr *)tail->data;
if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
((TCP_SKB_CB(tail)->tcp_flags |
TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
!((TCP_SKB_CB(tail)->tcp_flags &
TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
((TCP_SKB_CB(tail)->tcp_flags ^
TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
#ifdef CONFIG_TLS_DEVICE
tail->decrypted != skb->decrypted ||
#endif
thtail->doff != th->doff ||
memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
goto no_coalesce;
__skb_pull(skb, hdrlen);
shinfo = skb_shinfo(skb);
gso_size = shinfo->gso_size ?: skb->len;
gso_segs = shinfo->gso_segs ?: 1;
shinfo = skb_shinfo(tail);
tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen);
tail_gso_segs = shinfo->gso_segs ?: 1;
if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
if (likely(!before(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))) {
TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
thtail->window = th->window;
}
/* We have to update both TCP_SKB_CB(tail)->tcp_flags and
* thtail->fin, so that the fast path in tcp_rcv_established()
* is not entered if we append a packet with a FIN.
* SYN, RST, URG are not present.
* ACK is set on both packets.
* PSH : we do not really care in TCP stack,
* at least for 'GRO' packets.
*/
thtail->fin |= th->fin;
TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
if (TCP_SKB_CB(skb)->has_rxtstamp) {
TCP_SKB_CB(tail)->has_rxtstamp = true;
tail->tstamp = skb->tstamp;
skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
}
/* Not as strict as GRO. We only need to carry mss max value */
shinfo->gso_size = max(gso_size, tail_gso_size);
shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF);
sk->sk_backlog.len += delta;
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPBACKLOGCOALESCE);
kfree_skb_partial(skb, fragstolen);
return false;
}
__skb_push(skb, hdrlen);
no_coalesce:
/* Only socket owner can try to collapse/prune rx queues
* to reduce memory overhead, so add a little headroom here.
* Few sockets backlog are possibly concurrently non empty.
*/
limit += 64*1024;
if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
return true;
}
return false;
}
EXPORT_SYMBOL(tcp_add_backlog);
int tcp_filter(struct sock *sk, struct sk_buff *skb)
{
struct tcphdr *th = (struct tcphdr *)skb->data;
return sk_filter_trim_cap(sk, skb, th->doff * 4);
}
EXPORT_SYMBOL(tcp_filter);
static void tcp_v4_restore_cb(struct sk_buff *skb)
{
memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
sizeof(struct inet_skb_parm));
}
static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
const struct tcphdr *th)
{
/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
* barrier() makes sure compiler wont play fool^Waliasing games.
*/
memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
sizeof(struct inet_skb_parm));
barrier();
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
}
/*
* From tcp_input.c
*/
int tcp_v4_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
struct sk_buff *skb_to_free;
int sdif = inet_sdif(skb);
const struct iphdr *iph;
const struct tcphdr *th;
bool refcounted;
struct sock *sk;
int ret;
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
/* Count it even if it's bad */
__TCP_INC_STATS(net, TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
th = (const struct tcphdr *)skb->data;
if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
goto bad_packet;
if (!pskb_may_pull(skb, th->doff * 4))
goto discard_it;
/* An explanation is required here, I think.
* Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is eliminated.
* So, we defer the checks. */
if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
goto csum_error;
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
lookup:
sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
th->dest, sdif, &refcounted);
if (!sk)
goto no_tcp_socket;
process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
bool req_stolen = false;
struct sock *nsk;
sk = req->rsk_listener;
if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
}
if (tcp_checksum_complete(skb)) {
reqsk_put(req);
goto csum_error;
}
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
/* We own a reference on the listener, increase it again
* as we might lose it too soon.
*/
sock_hold(sk);
refcounted = true;
nsk = NULL;
if (!tcp_filter(sk, skb)) {
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
}
if (!nsk) {
reqsk_put(req);
if (req_stolen) {
/* Another cpu got exclusive access to req
* and created a full blown socket.
* Try to feed this packet to this socket
* instead of discarding it.
*/
tcp_v4_restore_cb(skb);
sock_put(sk);
goto lookup;
}
goto discard_and_relse;
}
if (nsk == sk) {
reqsk_put(req);
tcp_v4_restore_cb(skb);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
goto discard_and_relse;
} else {
sock_put(sk);
return 0;
}
}
if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
}
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
if (tcp_v4_inbound_md5_hash(sk, skb))
goto discard_and_relse;
nf_reset_ct(skb);
if (tcp_filter(sk, skb))
goto discard_and_relse;
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
skb->dev = NULL;
if (sk->sk_state == TCP_LISTEN) {
ret = tcp_v4_do_rcv(sk, skb);
goto put_and_return;
}
sk_incoming_cpu_update(sk);
bh_lock_sock_nested(sk);
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
skb_to_free = sk->sk_rx_skb_cache;
sk->sk_rx_skb_cache = NULL;
ret = tcp_v4_do_rcv(sk, skb);
} else {
if (tcp_add_backlog(sk, skb))
goto discard_and_relse;
skb_to_free = NULL;
}
bh_unlock_sock(sk);
if (skb_to_free)
__kfree_skb(skb_to_free);
put_and_return:
if (refcounted)
sock_put(sk);
return ret;
no_tcp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
tcp_v4_fill_cb(skb, iph, th);
if (tcp_checksum_complete(skb)) {
csum_error:
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
tcp_v4_send_reset(NULL, skb);
}
discard_it:
/* Discard frame. */
kfree_skb(skb);
return 0;
discard_and_relse:
sk_drops_add(sk, skb);
if (refcounted)
sock_put(sk);
goto discard_it;
do_time_wait:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
tcp_v4_fill_cb(skb, iph, th);
if (tcp_checksum_complete(skb)) {
inet_twsk_put(inet_twsk(sk));
goto csum_error;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
&tcp_hashinfo, skb,
__tcp_hdrlen(th),
iph->saddr, th->source,
iph->daddr, th->dest,
inet_iif(skb),
sdif);
if (sk2) {
inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
goto process;
}
}
/* to ACK */
/* fall through */
case TCP_TW_ACK:
tcp_v4_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
tcp_v4_send_reset(sk, skb);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:;
}
goto discard_it;
}
static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp_timewait_sock),
.twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
};
void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
if (dst && dst_hold_safe(dst)) {
rcu_assign_pointer(sk->sk_rx_dst, dst);
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
}
}
EXPORT_SYMBOL(inet_sk_rx_dst_set);
const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
.sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v4_conn_request,
.syn_recv_sock = tcp_v4_syn_recv_sock,
.net_header_len = sizeof(struct iphdr),
.setsockopt = ip_setsockopt,
.getsockopt = ip_getsockopt,
.addr2sockaddr = inet_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
.mtu_reduced = tcp_v4_mtu_reduced,
};
EXPORT_SYMBOL(ipv4_specific);
#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
.md5_parse = tcp_v4_parse_md5_keys,
};
#endif
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc() so need not be done here.
*/
static int tcp_v4_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_init_sock(sk);
icsk->icsk_af_ops = &ipv4_specific;
#ifdef CONFIG_TCP_MD5SIG
tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
#endif
return 0;
}
void tcp_v4_destroy_sock(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
trace_tcp_destroy_sock(sk);
tcp_clear_xmit_timers(sk);
tcp_cleanup_congestion_control(sk);
tcp_cleanup_ulp(sk);
/* Cleanup up the write buffer. */
tcp_write_queue_purge(sk);
/* Check if we want to disable active TFO */
tcp_fastopen_active_disable_ofo_check(sk);
/* Cleans up our, hopefully empty, out_of_order_queue. */
skb_rbtree_purge(&tp->out_of_order_queue);
#ifdef CONFIG_TCP_MD5SIG
/* Clean up the MD5 key list, if any */
if (tp->md5sig_info) {
tcp_clear_md5_list(sk);
kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
tp->md5sig_info = NULL;
}
#endif
/* Clean up a referenced TCP bind bucket. */
if (inet_csk(sk)->icsk_bind_hash)
inet_put_port(sk);
BUG_ON(rcu_access_pointer(tp->fastopen_rsk));
/* If socket is aborted during connect operation */
tcp_free_fastopen_req(tp);
tcp_fastopen_destroy_cipher(sk);
tcp_saved_syn_free(tp);
sk_sockets_allocated_dec(sk);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
/*
* Get next listener socket follow cur. If cur is NULL, get first socket
* starting from bucket given in st->bucket; when st->bucket is zero the
* very first socket in the hash table is returned.
*/
static void *listening_get_next(struct seq_file *seq, void *cur)
{
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
struct inet_listen_hashbucket *ilb;
struct hlist_nulls_node *node;
struct sock *sk = cur;
if (!sk) {
get_head:
ilb = &tcp_hashinfo.listening_hash[st->bucket];
spin_lock(&ilb->lock);
sk = sk_nulls_head(&ilb->nulls_head);
st->offset = 0;
goto get_sk;
}
ilb = &tcp_hashinfo.listening_hash[st->bucket];
++st->num;
++st->offset;
sk = sk_nulls_next(sk);
get_sk:
sk_nulls_for_each_from(sk, node) {
if (!net_eq(sock_net(sk), net))
continue;
if (sk->sk_family == afinfo->family)
return sk;
}
spin_unlock(&ilb->lock);
st->offset = 0;
if (++st->bucket < INET_LHTABLE_SIZE)
goto get_head;
return NULL;
}
static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
{
struct tcp_iter_state *st = seq->private;
void *rc;
st->bucket = 0;
st->offset = 0;
rc = listening_get_next(seq, NULL);
while (rc && *pos) {
rc = listening_get_next(seq, rc);
--*pos;
}
return rc;
}
static inline bool empty_bucket(const struct tcp_iter_state *st)
{
return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
}
/*
* Get first established socket starting from bucket given in st->bucket.
* If st->bucket is zero, the very first socket in the hash is returned.
*/
static void *established_get_first(struct seq_file *seq)
{
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
void *rc = NULL;
st->offset = 0;
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
struct sock *sk;
struct hlist_nulls_node *node;
spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
/* Lockless fast path for the common case of empty buckets */
if (empty_bucket(st))
continue;
spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
if (sk->sk_family != afinfo->family ||
!net_eq(sock_net(sk), net)) {
continue;
}
rc = sk;
goto out;
}
spin_unlock_bh(lock);
}
out:
return rc;
}
static void *established_get_next(struct seq_file *seq, void *cur)
{
struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
struct sock *sk = cur;
struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
++st->num;
++st->offset;
sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) {
if (sk->sk_family == afinfo->family &&
net_eq(sock_net(sk), net))
return sk;
}
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
++st->bucket;
return established_get_first(seq);
}
static void *established_get_idx(struct seq_file *seq, loff_t pos)
{
struct tcp_iter_state *st = seq->private;
void *rc;
st->bucket = 0;
rc = established_get_first(seq);
while (rc && pos) {
rc = established_get_next(seq, rc);
--pos;
}
return rc;
}
static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
{
void *rc;
struct tcp_iter_state *st = seq->private;
st->state = TCP_SEQ_STATE_LISTENING;
rc = listening_get_idx(seq, &pos);
if (!rc) {
st->state = TCP_SEQ_STATE_ESTABLISHED;
rc = established_get_idx(seq, pos);
}
return rc;
}
static void *tcp_seek_last_pos(struct seq_file *seq)
{
struct tcp_iter_state *st = seq->private;
int bucket = st->bucket;
int offset = st->offset;
int orig_num = st->num;
void *rc = NULL;
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
if (st->bucket >= INET_LHTABLE_SIZE)
break;
st->state = TCP_SEQ_STATE_LISTENING;
rc = listening_get_next(seq, NULL);
while (offset-- && rc && bucket == st->bucket)
rc = listening_get_next(seq, rc);
if (rc)
break;
st->bucket = 0;
st->state = TCP_SEQ_STATE_ESTABLISHED;
/* Fallthrough */
case TCP_SEQ_STATE_ESTABLISHED:
if (st->bucket > tcp_hashinfo.ehash_mask)
break;
rc = established_get_first(seq);
while (offset-- && rc && bucket == st->bucket)
rc = established_get_next(seq, rc);
}
st->num = orig_num;
return rc;
}
void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
{
struct tcp_iter_state *st = seq->private;
void *rc;
if (*pos && *pos == st->last_pos) {
rc = tcp_seek_last_pos(seq);
if (rc)
goto out;
}
st->state = TCP_SEQ_STATE_LISTENING;
st->num = 0;
st->bucket = 0;
st->offset = 0;
rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
out:
st->last_pos = *pos;
return rc;
}
EXPORT_SYMBOL(tcp_seq_start);
void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct tcp_iter_state *st = seq->private;
void *rc = NULL;
if (v == SEQ_START_TOKEN) {
rc = tcp_get_idx(seq, 0);
goto out;
}
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
rc = listening_get_next(seq, v);
if (!rc) {
st->state = TCP_SEQ_STATE_ESTABLISHED;
st->bucket = 0;
st->offset = 0;
rc = established_get_first(seq);
}
break;
case TCP_SEQ_STATE_ESTABLISHED:
rc = established_get_next(seq, v);
break;
}
out:
++*pos;
st->last_pos = *pos;
return rc;
}
EXPORT_SYMBOL(tcp_seq_next);
void tcp_seq_stop(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st = seq->private;
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
break;
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
break;
}
}
EXPORT_SYMBOL(tcp_seq_stop);
static void get_openreq4(const struct request_sock *req,
struct seq_file *f, int i)
{
const struct inet_request_sock *ireq = inet_rsk(req);
long delta = req->rsk_timer.expires - jiffies;
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
i,
ireq->ir_loc_addr,
ireq->ir_num,
ireq->ir_rmt_addr,
ntohs(ireq->ir_rmt_port),
TCP_SYN_RECV,
0, 0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
jiffies_delta_to_clock_t(delta),
req->num_timeout,
from_kuid_munged(seq_user_ns(f),
sock_i_uid(req->rsk_listener)),
0, /* non standard timer */
0, /* open_requests have no inode */
0,
req);
}
static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
{
int timer_active;
unsigned long timer_expires;
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct inet_sock *inet = inet_sk(sk);
const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
__be32 dest = inet->inet_daddr;
__be32 src = inet->inet_rcv_saddr;
__u16 destp = ntohs(inet->inet_dport);
__u16 srcp = ntohs(inet->inet_sport);
__u8 seq_state = sk->sk_state;
int rx_queue;
int state;
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
} else if (timer_pending(&sk->sk_timer)) {
timer_active = 2;
timer_expires = sk->sk_timer.expires;
} else {
timer_active = 0;
timer_expires = jiffies;
}
state = inet_sk_state_load(sk);
if (inet->transparent)
seq_state |= 0x80;
if (state == TCP_LISTEN)
rx_queue = sk->sk_ack_backlog;
else
/* Because we don't lock the socket,
* we might find a transient negative value.
*/
rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
READ_ONCE(tp->copied_seq), 0);
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
i, src, srcp, dest, destp, seq_state,
READ_ONCE(tp->write_seq) - tp->snd_una,
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
icsk->icsk_probes_out,
sock_i_ino(sk),
refcount_read(&sk->sk_refcnt), sk,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
tp->snd_cwnd,
state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
}
static void get_timewait4_sock(const struct inet_timewait_sock *tw,
struct seq_file *f, int i)
{
long delta = tw->tw_timer.expires - jiffies;
__be32 dest, src;
__u16 destp, srcp;
dest = tw->tw_daddr;
src = tw->tw_rcv_saddr;
destp = ntohs(tw->tw_dport);
srcp = ntohs(tw->tw_sport);
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
refcount_read(&tw->tw_refcnt), tw);
}
#define TMPSZ 150
static int tcp4_seq_show(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st;
struct sock *sk = v;
seq_setwidth(seq, TMPSZ - 1);
if (v == SEQ_START_TOKEN) {
seq_puts(seq, " sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
"inode");
goto out;
}
st = seq->private;
if (sk->sk_state == TCP_TIME_WAIT)
get_timewait4_sock(v, seq, st->num);
else if (sk->sk_state == TCP_NEW_SYN_RECV)
get_openreq4(v, seq, st->num);
else
get_tcp4_sock(v, seq, st->num);
out:
seq_pad(seq, '\n');
return 0;
}
static const struct seq_operations tcp4_seq_ops = {
.show = tcp4_seq_show,
.start = tcp_seq_start,
.next = tcp_seq_next,
.stop = tcp_seq_stop,
};
static struct tcp_seq_afinfo tcp4_seq_afinfo = {
.family = AF_INET,
};
static int __net_init tcp4_proc_init_net(struct net *net)
{
if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
return -ENOMEM;
return 0;
}
static void __net_exit tcp4_proc_exit_net(struct net *net)
{
remove_proc_entry("tcp", net->proc_net);
}
static struct pernet_operations tcp4_net_ops = {
.init = tcp4_proc_init_net,
.exit = tcp4_proc_exit_net,
};
int __init tcp4_proc_init(void)
{
return register_pernet_subsys(&tcp4_net_ops);
}
void tcp4_proc_exit(void)
{
unregister_pernet_subsys(&tcp4_net_ops);
}
#endif /* CONFIG_PROC_FS */
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
.pre_connect = tcp_v4_pre_connect,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v4_init_sock,
.destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v4_do_rcv,
.release_cb = tcp_release_cb,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
.leave_memory_pressure = tcp_leave_memory_pressure,
.stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
.no_autobind = true,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
.diag_destroy = tcp_abort,
};
EXPORT_SYMBOL(tcp_prot);
static void __net_exit tcp_sk_exit(struct net *net)
{
int cpu;
if (net->ipv4.tcp_congestion_control)
module_put(net->ipv4.tcp_congestion_control->owner);
for_each_possible_cpu(cpu)
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
free_percpu(net->ipv4.tcp_sk);
}
static int __net_init tcp_sk_init(struct net *net)
{
int res, cpu, cnt;
net->ipv4.tcp_sk = alloc_percpu(struct sock *);
if (!net->ipv4.tcp_sk)
return -ENOMEM;
for_each_possible_cpu(cpu) {
struct sock *sk;
res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
IPPROTO_TCP, net);
if (res)
goto fail;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
/* Please enforce IP_DF and IPID==0 for RST and
* ACK sent in SYN-RECV and TIME-WAIT state.
*/
inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
}
net->ipv4.sysctl_tcp_ecn = 2;
net->ipv4.sysctl_tcp_ecn_fallback = 1;
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
net->ipv4.sysctl_tcp_syncookies = 1;
net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
net->ipv4.sysctl_tcp_orphan_retries = 0;
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
net->ipv4.sysctl_tcp_tw_reuse = 2;
cnt = tcp_hashinfo.ehash_mask + 1;
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
net->ipv4.sysctl_tcp_sack = 1;
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
net->ipv4.sysctl_tcp_early_retrans = 3;
net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
net->ipv4.sysctl_tcp_retrans_collapse = 1;
net->ipv4.sysctl_tcp_max_reordering = 300;
net->ipv4.sysctl_tcp_dsack = 1;
net->ipv4.sysctl_tcp_app_win = 31;
net->ipv4.sysctl_tcp_adv_win_scale = 1;
net->ipv4.sysctl_tcp_frto = 2;
net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
/* This limits the percentage of the congestion window which we
* will allow a single TSO frame to consume. Building TSO frames
* which are too large can cause TCP streams to be bursty.
*/
net->ipv4.sysctl_tcp_tso_win_divisor = 3;
/* Default TSQ limit of 16 TSO segments */
net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
/* rfc5961 challenge ack rate limiting */
net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
net->ipv4.sysctl_tcp_min_tso_segs = 2;
net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
net->ipv4.sysctl_tcp_autocorking = 1;
net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
if (net != &init_net) {
memcpy(net->ipv4.sysctl_tcp_rmem,
init_net.ipv4.sysctl_tcp_rmem,
sizeof(init_net.ipv4.sysctl_tcp_rmem));
memcpy(net->ipv4.sysctl_tcp_wmem,
init_net.ipv4.sysctl_tcp_wmem,
sizeof(init_net.ipv4.sysctl_tcp_wmem));
}
net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
/* Reno is always built in */
if (!net_eq(net, &init_net) &&
try_module_get(init_net.ipv4.tcp_congestion_control->owner))
net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
else
net->ipv4.tcp_congestion_control = &tcp_reno;
return 0;
fail:
tcp_sk_exit(net);
return res;
}
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
inet_twsk_purge(&tcp_hashinfo, AF_INET);
list_for_each_entry(net, net_exit_list, exit_list)
tcp_fastopen_ctx_destroy(net);
}
static struct pernet_operations __net_initdata tcp_sk_ops = {
.init = tcp_sk_init,
.exit = tcp_sk_exit,
.exit_batch = tcp_sk_exit_batch,
};
void __init tcp_v4_init(void)
{
if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
}