Merge tag 'ASB-2024-08-05_12-5.10' of https://android.googlesource.com/kernel/common into android13-5.10-waipio

https://source.android.com/docs/security/bulletin/2024-08-01
CVE-2024-36971

* tag 'ASB-2024-08-05_12-5.10' of https://android.googlesource.com/kernel/common:
  ANDROID: GKI: remove export of tracing control functions
  ANDROID: Incremental fs: Retry page faults on non-fatal errors
  ANDROID: update ABI XML due to struct clk_core change
  UPSTREAM: usb: gadget: configfs: Prevent OOB read/write in usb_string_copy()
  UPSTREAM: f2fs: avoid false alarm of circular locking
  UPSTREAM: f2fs: fix deadlock in i_xattr_sem and inode page lock
  ANDROID: userfaultfd: Fix use-after-free in userfaultfd_using_sigbus()
  ANDROID: 16K: Don't set padding vm_flags on 32-bit archs
  ANDROID: update .xml file due to struct clk_core abi change
  ANDROID: mark DRM_VMWGFX as BROKEN
  Revert "ANDROID: Setting up GS before calling __restore_processor_state."
  Revert "block: introduce zone_write_granularity limit"
  Revert "block: Clear zone limits for a non-zoned stacked queue"
  Revert "scsi: sd: Fix wrong zone_write_granularity value during revalidate"
  Revert "PCI/ERR: Cache RCEC EA Capability offset in pci_init_capabilities()"
  Revert "PCI: Cache PCIe Device Capabilities register"
  Revert "PCI: Work around Intel I210 ROM BAR overlap defect"
  Revert "PCI/ASPM: Make Intel DG2 L1 acceptable latency unlimited"
  Revert "PCI/DPC: Quirk PIO log size for certain Intel Root Ports"
  Revert "PCI/DPC: Quirk PIO log size for Intel Ice Lake Root Ports"
  Revert "PCI/DPC: Quirk PIO log size for Intel Raptor Lake Root Ports"
  Revert "timers: Rename del_timer_sync() to timer_delete_sync()"
  Linux 5.10.218
  docs: kernel_include.py: Cope with docutils 0.21
  serial: kgdboc: Fix NMI-safety problems from keyboard reset code
  usb: typec: ucsi: displayport: Fix potential deadlock
  drm/amdgpu: Fix possible NULL dereference in amdgpu_ras_query_error_status_helper()
  btrfs: add missing mutex_unlock in btrfs_relocate_sys_chunks()
  mptcp: ensure snd_nxt is properly initialized on connect
  firmware: arm_scmi: Harden accesses to the reset domains
  KVM: x86: Clear "has_error_code", not "error_code", for RM exception injection
  netlink: annotate lockless accesses to nlk->max_recvmsg_len
  ima: fix deadlock when traversing "ima_default_rules".
  net: bcmgenet: synchronize UMAC_CMD access
  net: bcmgenet: synchronize EXT_RGMII_OOB_CTRL access
  Revert "selftests: mm: fix map_hugetlb failure on 64K page size systems"
  x86/xen: Drop USERGS_SYSRET64 paravirt call
  pinctrl: core: handle radix_tree_insert() errors in pinctrl_register_one_pin()
  Linux 5.10.217
  md: fix kmemleak of rdev->serial
  keys: Fix overwrite of key expiration on instantiation
  regulator: core: fix debugfs creation regression
  hwmon: (pmbus/ucd9000) Increase delay from 250 to 500us
  net: fix out-of-bounds access in ops_init
  drm/vmwgfx: Fix invalid reads in fence signaled events
  mei: me: add lunar lake point M DID
  dyndbg: fix old BUG_ON in >control parser
  ASoC: tegra: Fix DSPK 16-bit playback
  net: bcmgenet: synchronize use of bcmgenet_set_rx_mode()
  tipc: fix UAF in error path
  iio: accel: mxc4005: Interrupt handling fixes
  iio:imu: adis16475: Fix sync mode setting
  ALSA: hda/realtek: Fix mute led of HP Laptop 15-da3001TU
  usb: dwc3: core: Prevent phy suspend during init
  usb: xhci-plat: Don't include xhci.h
  usb: gadget: f_fs: Fix a race condition when processing setup packets.
  usb: gadget: composite: fix OS descriptors w_value logic
  usb: ohci: Prevent missed ohci interrupts
  usb: Fix regression caused by invalid ep0 maxpacket in virtual SuperSpeed device
  usb: typec: ucsi: Fix connector check on init
  usb: typec: ucsi: Check for notifications after init
  arm64: dts: qcom: Fix 'interrupt-map' parent address cells
  firewire: nosy: ensure user_length is taken into account when fetching packet contents
  btrfs: fix kvcalloc() arguments order in btrfs_ioctl_send()
  net: hns3: use appropriate barrier function after setting a bit value
  ipv6: fib6_rules: avoid possible NULL dereference in fib6_rule_action()
  net: bridge: fix corrupted ethernet header on multicast-to-unicast
  kcov: Remove kcov include from sched.h and move it to its users.
  phonet: fix rtm_phonet_notify() skb allocation
  hwmon: (corsair-cpro) Protect ccp->wait_input_report with a spinlock
  hwmon: (corsair-cpro) Use complete_all() instead of complete() in ccp_raw_event()
  hwmon: (corsair-cpro) Use a separate buffer for sending commands
  rtnetlink: Correct nested IFLA_VF_VLAN_LIST attribute validation
  Bluetooth: l2cap: fix null-ptr-deref in l2cap_chan_timeout
  Bluetooth: Fix use-after-free bugs caused by sco_sock_timeout
  tcp: Use refcount_inc_not_zero() in tcp_twsk_unique().
  tcp: defer shutdown(SEND_SHUTDOWN) for TCP_SYN_RECV sockets
  xfrm: Preserve vlan tags for transport mode software GRO
  net:usb:qmi_wwan: support Rolling modules
  drm/nouveau/dp: Don't probe eDP ports twice harder
  fs/9p: drop inodes immediately on non-.L too
  clk: Don't hold prepare_lock when calling kref_put()
  gpio: crystalcove: Use -ENOTSUPP consistently
  gpio: wcove: Use -ENOTSUPP consistently
  9p: explicitly deny setlease attempts
  fs/9p: translate O_TRUNC into OTRUNC
  fs/9p: only translate RWX permissions for plain 9P2000
  selftests: timers: Fix valid-adjtimex signed left-shift undefined behavior
  MIPS: scall: Save thread_info.syscall unconditionally on entry
  gpu: host1x: Do not setup DMA for virtual devices
  blk-iocost: avoid out of bounds shift
  scsi: target: Fix SELinux error when systemd-modules loads the target module
  btrfs: always clear PERTRANS metadata during commit
  btrfs: make btrfs_clear_delalloc_extent() free delalloc reserve
  tools/power turbostat: Fix Bzy_MHz documentation typo
  tools/power turbostat: Fix added raw MSR output
  firewire: ohci: mask bus reset interrupts between ISR and bottom half
  ata: sata_gemini: Check clk_enable() result
  net: bcmgenet: Reset RBUF on first open
  ALSA: line6: Zero-initialize message buffers
  btrfs: return accurate error code on open failure in open_fs_devices()
  scsi: bnx2fc: Remove spin_lock_bh while releasing resources after upload
  net: mark racy access on sk->sk_rcvbuf
  wifi: cfg80211: fix rdev_dump_mpp() arguments order
  wifi: mac80211: fix ieee80211_bss_*_flags kernel-doc
  gfs2: Fix invalid metadata access in punch_hole
  scsi: lpfc: Update lpfc_ramp_down_queue_handler() logic
  KVM: arm64: vgic-v2: Check for non-NULL vCPU in vgic_v2_parse_attr()
  KVM: arm64: vgic-v2: Use cpuid from userspace as vcpu_id
  clk: sunxi-ng: h6: Reparent CPUX during PLL CPUX rate change
  net: gro: add flush check in udp_gro_receive_segment
  tipc: fix a possible memleak in tipc_buf_append
  net: core: reject skb_copy(_expand) for fraglist GSO skbs
  net: bridge: fix multicast-to-unicast with fraglist GSO
  net: dsa: mv88e6xxx: Fix number of databases for 88E6141 / 88E6341
  cxgb4: Properly lock TX queue for the selftest.
  ASoC: meson: cards: select SND_DYNAMIC_MINORS
  ASoC: Fix 7/8 spaces indentation in Kconfig
  net: qede: use return from qede_parse_actions()
  net: qede: use return from qede_parse_flow_attr() for flow_spec
  net: qede: use return from qede_parse_flow_attr() for flower
  net: qede: sanitize 'rc' in qede_add_tc_flower_fltr()
  s390/vdso: Add CFI for RA register to asm macro vdso_func
  net l2tp: drop flow hash on forward
  nsh: Restore skb->{protocol,data,mac_header} for outer header in nsh_gso_segment().
  octeontx2-af: avoid off-by-one read from userspace
  bna: ensure the copied buf is NUL terminated
  s390/mm: Fix clearing storage keys for huge pages
  s390/mm: Fix storage key clearing for guest huge pages
  regulator: mt6360: De-capitalize devicetree regulator subnodes
  pinctrl: devicetree: fix refcount leak in pinctrl_dt_to_map()
  power: rt9455: hide unused rt9455_boost_voltage_values
  nfs: Handle error of rpc_proc_register() in nfs_net_init().
  nfs: make the rpc_stat per net namespace
  nfs: expose /proc/net/sunrpc/nfs in net namespaces
  sunrpc: add a struct rpc_stats arg to rpc_create_args
  pinctrl: mediatek: paris: Rework support for PIN_CONFIG_{INPUT,OUTPUT}_ENABLE
  pinctrl: mediatek: paris: Fix PIN_CONFIG_INPUT_SCHMITT_ENABLE readback
  pinctrl: mediatek: paris: Rework mtk_pinconf_{get,set} switch/case logic
  pinctrl: core: delete incorrect free in pinctrl_enable()
  pinctrl/meson: fix typo in PDM's pin name
  pinctrl: pinctrl-aspeed-g6: Fix register offset for pinconf of GPIOR-T
  eeprom: at24: fix memory corruption race condition
  eeprom: at24: Probe for DDR3 thermal sensor in the SPD case
  eeprom: at24: Use dev_err_probe for nvmem register failure
  wifi: nl80211: don't free NULL coalescing rule
  dmaengine: Revert "dmaengine: pl330: issue_pending waits until WFP state"
  dmaengine: pl330: issue_pending waits until WFP state
  Linux 5.10.216
  riscv: Disable STACKPROTECTOR_PER_TASK if GCC_PLUGIN_RANDSTRUCT is enabled
  serial: core: fix kernel-doc for uart_port_unlock_irqrestore()
  udp: preserve the connected status if only UDP cmsg
  bounds: Use the right number of bits for power-of-two CONFIG_NR_CPUS
  HID: i2c-hid: remove I2C_HID_READ_PENDING flag to prevent lock-up
  i2c: smbus: fix NULL function pointer dereference
  riscv: Fix TASK_SIZE on 64-bit NOMMU
  riscv: fix VMALLOC_START definition
  dma: xilinx_dpdma: Fix locking
  idma64: Don't try to serve interrupts when device is powered off
  dmaengine: owl: fix register access functions
  tcp: Fix NEW_SYN_RECV handling in inet_twsk_purge()
  tcp: Clean up kernel listener's reqsk in inet_twsk_purge()
  mtd: diskonchip: work around ubsan link failure
  stackdepot: respect __GFP_NOLOCKDEP allocation flag
  net: b44: set pause params only when interface is up
  ethernet: Add helper for assigning packet type when dest address does not match device address
  irqchip/gic-v3-its: Prevent double free on error
  drm/amdgpu: Fix leak when GPU memory allocation fails
  drm/amdgpu/sdma5.2: use legacy HDP flush for SDMA2/3
  arm64: dts: rockchip: enable internal pull-up for Q7_THRM# on RK3399 Puma
  cpu: Re-enable CPU mitigations by default for !X86 architectures
  btrfs: fix information leak in btrfs_ioctl_logical_to_ino()
  Bluetooth: btusb: Add Realtek RTL8852BE support ID 0x0bda:0x4853
  Bluetooth: Fix type of len in {l2cap,sco}_sock_getsockopt_old()
  PM / devfreq: Fix buffer overflow in trans_stat_show
  tracing: Increase PERF_MAX_TRACE_SIZE to handle Sentinel1 and docker together
  tracing: Show size of requested perf buffer
  net/mlx5e: Fix a race in command alloc flow
  Revert "crypto: api - Disallow identical driver names"
  serial: mxs-auart: add spinlock around changing cts state
  serial: core: Provide port lock wrappers
  af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc().
  net: ethernet: ti: am65-cpts: Fix PTPv1 message type on TX packets
  iavf: Fix TC config comparison with existing adapter TC config
  i40e: Report MFS in decimal base instead of hex
  i40e: Do not use WQ_MEM_RECLAIM flag for workqueue
  netfilter: nf_tables: honor table dormant flag from netdev release event path
  mlxsw: spectrum_acl_tcam: Fix memory leak when canceling rehash work
  mlxsw: spectrum_acl_tcam: Fix incorrect list API usage
  mlxsw: spectrum_acl_tcam: Fix warning during rehash
  mlxsw: spectrum_acl_tcam: Fix memory leak during rehash
  mlxsw: spectrum_acl_tcam: Rate limit error message
  mlxsw: spectrum_acl_tcam: Fix possible use-after-free during rehash
  mlxsw: spectrum_acl_tcam: Fix possible use-after-free during activity update
  mlxsw: spectrum_acl_tcam: Fix race during rehash delayed work
  net: openvswitch: Fix Use-After-Free in ovs_ct_exit
  ipvs: Fix checksumming on GSO of SCTP packets
  net: gtp: Fix Use-After-Free in gtp_dellink
  net: usb: ax88179_178a: stop lying about skb->truesize
  ipv4: check for NULL idev in ip_route_use_hint()
  NFC: trf7970a: disable all regulators on removal
  mlxsw: core: Unregister EMAD trap using FORWARD action
  vxlan: drop packets from invalid src-address
  wifi: iwlwifi: mvm: remove old PASN station when adding a new one
  ARC: [plat-hsdk]: Remove misplaced interrupt-cells property
  arm64: dts: mediatek: mt2712: fix validation errors
  arm64: dts: mediatek: mt7622: drop "reset-names" from thermal block
  arm64: dts: mediatek: mt7622: fix ethernet controller "compatible"
  arm64: dts: mediatek: mt7622: fix IR nodename
  arm64: dts: mediatek: mt7622: fix clock controllers
  arm64: dts: mediatek: mt7622: introduce nodes for Wireless Ethernet Dispatch
  arm64: dts: mediatek: mt7622: add support for coherent DMA
  arm64: dts: rockchip: Remove unsupported node from the Pinebook Pro dts
  arm64: dts: rockchip: enable internal pull-up on PCIE_WAKE# for RK3399 Puma
  arm64: dts: rockchip: fix alphabetical ordering RK3399 puma
  nilfs2: fix OOB in nilfs_set_de_type
  nouveau: fix instmem race condition around ptr stores
  drm/amdgpu: validate the parameters of bo mapping operations more clearly
  init/main.c: Fix potential static_command_line memory overflow
  fs: sysfs: Fix reference leak in sysfs_break_active_protection()
  speakup: Avoid crash on very long word
  mei: me: disable RPL-S on SPS and IGN firmwares
  usb: Disable USB3 LPM at shutdown
  usb: dwc2: host: Fix dereference issue in DDMA completion flow.
  Revert "usb: cdc-wdm: close race between read and workqueue"
  USB: serial: option: add Telit FN920C04 rmnet compositions
  USB: serial: option: add Rolling RW101-GL and RW135-GL support
  USB: serial: option: support Quectel EM060K sub-models
  USB: serial: option: add Lonsung U8300/U9300 product
  USB: serial: option: add support for Fibocom FM650/FG650
  USB: serial: option: add Fibocom FM135-GL variants
  serial/pmac_zilog: Remove flawed mitigation for rx irq flood
  comedi: vmk80xx: fix incomplete endpoint checking
  thunderbolt: Fix wake configurations after device unplug
  thunderbolt: Avoid notify PM core about runtime PM resume
  binder: check offset alignment in binder_get_object()
  x86/cpufeatures: Fix dependencies for GFNI, VAES, and VPCLMULQDQ
  clk: Get runtime PM before walking tree during disable_unused
  clk: Initialize struct clk_core kref earlier
  clk: Print an info line before disabling unused clocks
  clk: remove extra empty line
  clk: Mark 'all_lists' as const
  clk: Remove prepare_lock hold assertion in __clk_release()
  drm/panel: visionox-rm69299: don't unregister DSI device
  drm: nv04: Fix out of bounds access
  RDMA/mlx5: Fix port number for counter query in multi-port configuration
  RDMA/cm: Print the old state when cm_destroy_id gets timeout
  RDMA/rxe: Fix the problem "mutex_destroy missing"
  tun: limit printing rate when illegal packet received by tun dev
  netfilter: nft_set_pipapo: do not free live element
  netfilter: nf_tables: Fix potential data-race in __nft_expr_type_get()
  Revert "tracing/trigger: Fix to return error if failed to alloc snapshot"
  kprobes: Fix possible use-after-free issue on kprobe registration
  selftests/ftrace: Limit length in subsystem-enable tests
  riscv: process: Fix kernel gp leakage
  riscv: Enable per-task stack canaries
  btrfs: record delayed inode root in transaction
  irqflags: Explicitly ignore lockdep_hrtimer_exit() argument
  x86/apic: Force native_apic_mem_read() to use the MOV instruction
  selftests: timers: Fix abs() warning in posix_timers test
  x86/cpu: Actually turn off mitigations by default for SPECULATION_MITIGATIONS=n
  vhost: Add smp_rmb() in vhost_vq_avail_empty()
  drm/client: Fully protect modes[] with dev->mode_config.mutex
  btrfs: qgroup: correctly model root qgroup rsv in convert
  mailbox: imx: fix suspend failue
  iommu/vt-d: Allocate local memory for page request queue
  net: ena: Fix incorrect descriptor free behavior
  net: ena: Wrong missing IO completions check order
  net: ena: Fix potential sign extension issue
  af_unix: Fix garbage collector racing against connect()
  af_unix: Do not use atomic ops for unix_sk(sk)->inflight.
  net/mlx5: Properly link new fs rules into the tree
  netfilter: complete validation of user input
  Bluetooth: SCO: Fix not validating setsockopt user input
  ipv6: fix race condition between ipv6_get_ifaddr and ipv6_del_addr
  ipv4/route: avoid unused-but-set-variable warning
  ipv6: fib: hide unused 'pn' variable
  octeontx2-af: Fix NIX SQ mode and BP config
  geneve: fix header validation in geneve[6]_xmit_skb
  xsk: validate user input for XDP_{UMEM|COMPLETION}_FILL_RING
  u64_stats: fix u64_stats_init() for lockdep when used repeatedly in one file
  net: openvswitch: fix unwanted error log on timeout policy probing
  nouveau: fix function cast warning
  media: cec: core: remove length check of Timer Status
  Bluetooth: Fix memory leak in hci_req_sync_complete()
  batman-adv: Avoid infinite loop trying to resize local TT
  Linux 5.10.215
  x86/head/64: Re-enable stack protection
  x86/retpoline: Add NOENDBR annotation to the SRSO dummy return thunk
  scsi: sd: Fix wrong zone_write_granularity value during revalidate
  kbuild: dummy-tools: adjust to stricter stackprotector check
  VMCI: Fix possible memcpy() run-time warning in vmci_datagram_invoke_guest_handler()
  Bluetooth: btintel: Fixe build regression
  drm/i915/gt: Reset queue_priority_hint on parking
  x86/mm/pat: fix VM_PAT handling in COW mappings
  virtio: reenable config if freezing device failed
  tty: n_gsm: require CAP_NET_ADMIN to attach N_GSM0710 ldisc
  netfilter: nf_tables: discard table flag update with pending basechain deletion
  netfilter: nf_tables: release mutex after nft_gc_seq_end from abort path
  netfilter: nf_tables: release batch on table validation from abort path
  fbmon: prevent division by zero in fb_videomode_from_videomode()
  drivers/nvme: Add quirks for device 126f:2262
  fbdev: viafb: fix typo in hw_bitblt_1 and hw_bitblt_2
  usb: sl811-hcd: only defined function checkdone if QUIRK2 is defined
  usb: typec: tcpci: add generic tcpci fallback compatible
  tools: iio: replace seekdir() in iio_generic_buffer
  ring-buffer: use READ_ONCE() to read cpu_buffer->commit_page in concurrent environment
  ktest: force $buildonly = 1 for 'make_warnings_file' test type
  platform/x86: touchscreen_dmi: Add an extra entry for a variant of the Chuwi Vi8 tablet
  Input: allocate keycode for Display refresh rate toggle
  RDMA/cm: add timeout to cm_destroy_id wait
  block: prevent division by zero in blk_rq_stat_sum()
  libperf evlist: Avoid out-of-bounds access
  Revert "ACPI: PM: Block ASUS B1400CEAE from suspend to idle by default"
  SUNRPC: increase size of rpc_wait_queue.qlen from unsigned short to unsigned int
  drm/amd/display: Fix nanosec stat overflow
  ext4: forbid commit inconsistent quota data when errors=remount-ro
  ext4: add a hint for block bitmap corrupt state in mb_groups
  media: sta2x11: fix irq handler cast
  isofs: handle CDs with bad root inode but good Joliet root directory
  scsi: lpfc: Fix possible memory leak in lpfc_rcv_padisc()
  sysv: don't call sb_bread() with pointers_lock held
  pinctrl: renesas: checker: Limit cfg reg enum checks to provided IDs
  Input: synaptics-rmi4 - fail probing if memory allocation for "phys" fails
  Bluetooth: btintel: Fix null ptr deref in btintel_read_version
  net/smc: reduce rtnl pressure in smc_pnet_create_pnetids_list()
  btrfs: send: handle path ref underflow in header iterate_inode_ref()
  btrfs: export: handle invalid inode or root reference in btrfs_get_parent()
  btrfs: handle chunk tree lookup error in btrfs_relocate_sys_chunks()
  tools/power x86_energy_perf_policy: Fix file leak in get_pkg_num()
  pstore/zone: Add a null pointer check to the psz_kmsg_read
  ionic: set adminq irq affinity
  arm64: dts: rockchip: fix rk3399 hdmi ports node
  arm64: dts: rockchip: fix rk3328 hdmi ports node
  panic: Flush kernel log buffer at the end
  VMCI: Fix memcpy() run-time warning in dg_dispatch_as_host()
  wifi: ath9k: fix LNA selection in ath_ant_try_scan()
  objtool: Add asm version of STACK_FRAME_NON_STANDARD
  x86/cpufeatures: Add CPUID_LNX_5 to track recently added Linux-defined word
  mptcp: don't account accept() of non-MPC client as fallback to TCP
  x86/retpoline: Do the necessary fixup to the Zen3/4 srso return thunk for !SRSO
  x86/bugs: Fix the SRSO mitigation on Zen3/4
  riscv: Fix spurious errors from __get/put_kernel_nofault
  s390/entry: align system call table on 8 bytes
  x86/mce: Make sure to grab mce_sysfs_mutex in set_bank()
  of: dynamic: Synchronize of_changeset_destroy() with the devlink removals
  driver core: Introduce device_link_wait_removal()
  ALSA: hda/realtek: Update Panasonic CF-SZ6 quirk to support headset with microphone
  ata: sata_mv: Fix PCI device ID table declaration compilation warning
  scsi: mylex: Fix sysfs buffer lengths
  ata: sata_sx4: fix pdc20621_get_from_dimm() on 64-bit
  ASoC: ops: Fix wraparound for mask in snd_soc_get_volsw
  arm64: dts: qcom: sc7180-trogdor: mark bluetooth address as broken
  arm64: dts: qcom: sc7180: Remove clock for bluetooth on Trogdor
  net: ravb: Always process TX descriptor ring
  udp: do not accept non-tunnel GSO skbs landing in a tunnel
  Revert "usb: phy: generic: Get the vbus supply"
  scsi: qla2xxx: Update manufacturer detail
  scsi: qla2xxx: Update manufacturer details
  i40e: fix vf may be used uninitialized in this function warning
  i40e: fix i40e_count_filters() to count only active/new filters
  octeontx2-pf: check negative error code in otx2_open()
  udp: do not transition UDP GRO fraglist partial checksums to unnecessary
  ipv6: Fix infinite recursion in fib6_dump_done().
  selftests: reuseaddr_conflict: add missing new line at the end of the output
  erspan: make sure erspan_base_hdr is present in skb->head
  net: stmmac: fix rx queue priority assignment
  net/sched: act_skbmod: prevent kernel-infoleak
  bpf, sockmap: Prevent lock inversion deadlock in map delete elem
  vboxsf: Avoid an spurious warning if load_nls_xxx() fails
  netfilter: validate user input for expected length
  netfilter: nf_tables: Fix potential data-race in __nft_flowtable_type_get()
  netfilter: nf_tables: flush pending destroy work before exit_net release
  netfilter: nf_tables: reject new basechain after table flag update
  block: add check that partition length needs to be aligned with block size
  x86/srso: Add SRSO mitigation for Hygon processors
  mm, vmscan: prevent infinite loop for costly GFP_NOIO | __GFP_RETRY_MAYFAIL allocations
  Revert "x86/mm/ident_map: Use gbpages only where full GB page should be mapped."
  io_uring: ensure '0' is returned on file registration success
  vfio/fsl-mc: Block calling interrupt handler without trigger
  vfio/platform: Create persistent IRQ handlers
  vfio/pci: Create persistent INTx handler
  vfio: Introduce interface to flush virqfd inject workqueue
  vfio/pci: Lock external INTx masking ops
  vfio/pci: Disable auto-enable of exclusive INTx IRQ
  net/rds: fix possible cp null dereference
  netfilter: nf_tables: disallow timeout for anonymous sets
  Bluetooth: Fix TOCTOU in HCI debugfs implementation
  Bluetooth: hci_event: set the conn encrypted before conn establishes
  x86/cpufeatures: Add new word for scattered features
  r8169: fix issue caused by buggy BIOS on certain boards with RTL8168d
  dm integrity: fix out-of-range warning
  Octeontx2-af: fix pause frame configuration in GMP mode
  bpf: Protect against int overflow for stack access size
  ACPICA: debugger: check status of acpi_evaluate_object() in acpi_db_walk_for_fields()
  tcp: properly terminate timers for kernel sockets
  ixgbe: avoid sleeping allocation in ixgbe_ipsec_vf_add_sa()
  nfc: nci: Fix uninit-value in nci_dev_up and nci_ntf_packet
  USB: core: Fix deadlock in usb_deauthorize_interface()
  scsi: lpfc: Correct size for wqe for memset()
  PCI/DPC: Quirk PIO log size for Intel Ice Lake Root Ports
  x86/cpu: Enable STIBP on AMD if Automatic IBRS is enabled
  scsi: qla2xxx: Delay I/O Abort on PCI error
  scsi: qla2xxx: Fix command flush on cable pull
  scsi: qla2xxx: Split FCE|EFT trace control
  usb: typec: ucsi: Clear UCSI_CCI_RESET_COMPLETE before reset
  usb: typec: ucsi: Ack unsupported commands
  usb: udc: remove warning when queue disabled ep
  usb: dwc2: gadget: LPM flow fix
  usb: dwc2: host: Fix ISOC flow in DDMA mode
  usb: dwc2: host: Fix hibernation flow
  usb: dwc2: host: Fix remote wakeup from hibernation
  USB: core: Add hub_get() and hub_put() routines
  staging: vc04_services: fix information leak in create_component()
  staging: vc04_services: changen strncpy() to strscpy_pad()
  scsi: core: Fix unremoved procfs host directory regression
  ALSA: sh: aica: reorder cleanup operations to avoid UAF bugs
  usb: cdc-wdm: close race between read and workqueue
  net: ll_temac: platform_get_resource replaced by wrong function
  mmc: core: Avoid negative index with array access
  mmc: core: Initialize mmc_blk_ioc_data
  hexagon: vmlinux.lds.S: handle attributes section
  exec: Fix NOMMU linux_binprm::exec in transfer_args_to_stack()
  wifi: mac80211: check/clear fast rx for non-4addr sta VLAN changes
  init: open /initrd.image with O_LARGEFILE
  mm/migrate: set swap entry values of THP tail pages properly.
  mm/memory-failure: fix an incorrect use of tail pages
  serial: sc16is7xx: convert from _raw_ to _noinc_ regmap functions for FIFO
  powerpc: xor_vmx: Add '-mhard-float' to CFLAGS
  efivarfs: Request at most 512 bytes for variable names
  perf/core: Fix reentry problem in perf_output_read_group()
  KVM/x86: Export RFDS_NO and RFDS_CLEAR to guests
  x86/rfds: Mitigate Register File Data Sampling (RFDS)
  Documentation/hw-vuln: Add documentation for RFDS
  x86/mmio: Disable KVM mitigation when X86_FEATURE_CLEAR_CPU_BUF is set
  KVM/VMX: Move VERW closer to VMentry for MDS mitigation
  KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
  x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
  x86/entry_32: Add VERW just before userspace transition
  x86/entry_64: Add VERW just before userspace transition
  x86/bugs: Add asm helpers for executing VERW
  x86/asm: Add _ASM_RIP() macro for x86-64 (%rip) suffix
  btrfs: allocate btrfs_ioctl_defrag_range_args on stack
  printk: Update @console_may_schedule in console_trylock_spinning()
  xen/events: close evtchn after mapping cleanup
  tee: optee: Fix kernel panic caused by incorrect error handling
  fs/aio: Check IOCB_AIO_RW before the struct aio_kiocb conversion
  vt: fix unicode buffer corruption when deleting characters
  mei: me: add arrow lake point H DID
  mei: me: add arrow lake point S DID
  tty: serial: fsl_lpuart: avoid idle preamble pending if CTS is enabled
  usb: port: Don't try to peer unused USB ports based on location
  usb: gadget: ncm: Fix handling of zero block length packets
  USB: usb-storage: Prevent divide-by-0 error in isd200_ata_command
  ALSA: hda/realtek - Fix headset Mic no show at resume back for Lenovo ALC897 platform
  KVM: SVM: Flush pages under kvm->lock to fix UAF in svm_register_enc_region()
  xfrm: Avoid clang fortify warning in copy_to_user_tmpl()
  Drivers: hv: vmbus: Calculate ring buffer size for more efficient use of memory
  netfilter: nf_tables: reject constant set with timeout
  netfilter: nf_tables: disallow anonymous set with timeout flag
  netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout
  cpufreq: brcmstb-avs-cpufreq: fix up "add check for cpufreq_cpu_get's return value"
  comedi: comedi_test: Prevent timers rescheduling during deletion
  scripts: kernel-doc: Fix syntax error due to undeclared args variable
  x86/pm: Work around false positive kmemleak report in msr_build_context()
  x86/stackprotector/32: Make the canary into a regular percpu variable
  vxge: remove unnecessary cast in kfree()
  dm snapshot: fix lockup in dm_exception_table_exit
  drm/amd/display: Fix noise issue on HDMI AV mute
  drm/amd/display: Return the correct HDCP error code
  ahci: asm1064: asm1166: don't limit reported ports
  ahci: asm1064: correct count of reported ports
  wireguard: netlink: access device through ctx instead of peer
  wireguard: netlink: check for dangling peer via is_dead instead of empty list
  net: hns3: tracing: fix hclgevf trace event strings
  x86/CPU/AMD: Update the Zenbleed microcode revisions
  cpufreq: dt: always allocate zeroed cpumask
  nilfs2: prevent kernel bug at submit_bh_wbc()
  nilfs2: fix failure to detect DAT corruption in btree and direct mappings
  memtest: use {READ,WRITE}_ONCE in memory scanning
  drm/vc4: hdmi: do not return negative values from .get_modes()
  drm/imx/ipuv3: do not return negative values from .get_modes()
  drm/exynos: do not return negative values from .get_modes()
  drm/panel: do not return negative error codes from drm_panel_get_modes()
  s390/zcrypt: fix reference counting on zcrypt card objects
  soc: fsl: qbman: Use raw spinlock for cgr_lock
  soc: fsl: qbman: Add CGR update function
  soc: fsl: qbman: Add helper for sanity checking cgr ops
  soc: fsl: qbman: Always disable interrupts when taking cgr_lock
  ring-buffer: Fix full_waiters_pending in poll
  ring-buffer: Fix resetting of shortest_full
  ring-buffer: Do not set shortest_full when full target is hit
  ring-buffer: Fix waking up ring buffer readers
  vfio/platform: Disable virqfds on cleanup
  PCI: dwc: endpoint: Fix advertised resizable BAR size
  kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1
  nfs: fix UAF in direct writes
  PCI/AER: Block runtime suspend when handling errors
  PCI/ERR: Clear AER status only when we control AER
  speakup: Fix 8bit characters from direct synth
  usb: gadget: tegra-xudc: Fix USB3 PHY retrieval logic
  usb: gadget: tegra-xudc: Use dev_err_probe()
  phy: tegra: xusb: Add API to retrieve the port number of phy
  slimbus: core: Remove usage of the deprecated ida_simple_xx() API
  nvmem: meson-efuse: fix function pointer type mismatch
  ext4: fix corruption during on-line resize
  hwmon: (amc6821) add of_match table
  drm/etnaviv: Restore some id values
  mmc: core: Fix switch on gp3 partition
  mm: swap: fix race between free_swap_and_cache() and swapoff()
  mac802154: fix llsec key resources release in mac802154_llsec_key_del
  dm-raid: fix lockdep waring in "pers->hot_add_disk"
  Revert "Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d""
  PCI/DPC: Quirk PIO log size for Intel Raptor Lake Root Ports
  PCI/DPC: Quirk PIO log size for certain Intel Root Ports
  PCI/ASPM: Make Intel DG2 L1 acceptable latency unlimited
  PCI: Work around Intel I210 ROM BAR overlap defect
  PCI: Cache PCIe Device Capabilities register
  PCI/ERR: Cache RCEC EA Capability offset in pci_init_capabilities()
  PCI/PM: Drain runtime-idle callbacks before driver removal
  PCI: Drop pci_device_remove() test of pci_dev->driver
  btrfs: fix off-by-one chunk length calculation at contains_pending_extent()
  serial: Lock console when calling into driver before registration
  printk/console: Split out code that enables default console
  usb: typec: ucsi: Clean up UCSI_CABLE_PROP macros
  fuse: don't unhash root
  fuse: fix root lookup with nonzero generation
  mmc: tmio: avoid concurrent runs of mmc_request_done()
  PM: sleep: wakeirq: fix wake irq warning in system suspend
  USB: serial: cp210x: add pid/vid for TDK NC0110013M and MM0110113M
  USB: serial: option: add MeiG Smart SLM320 product
  USB: serial: cp210x: add ID for MGP Instruments PDS100
  USB: serial: add device ID for VeriFone adapter
  USB: serial: ftdi_sio: add support for GMC Z216C Adapter IR-USB
  powerpc/fsl: Fix mfpmr build errors with newer binutils
  clk: qcom: mmcc-msm8974: fix terminating of frequency table arrays
  clk: qcom: mmcc-apq8084: fix terminating of frequency table arrays
  clk: qcom: gcc-ipq8074: fix terminating of frequency table arrays
  clk: qcom: gcc-ipq6018: fix terminating of frequency table arrays
  PM: suspend: Set mem_sleep_current during kernel command line setup
  parisc: Strip upper 32 bit of sum in csum_ipv6_magic for 64-bit builds
  parisc: Fix csum_ipv6_magic on 64-bit systems
  parisc: Fix csum_ipv6_magic on 32-bit systems
  parisc: Fix ip_fast_csum
  parisc: Avoid clobbering the C/B bits in the PSW with tophys and tovirt macros
  mtd: rawnand: meson: fix scrambling mode value in command macro
  ubi: correct the calculation of fastmap size
  ubi: Check for too small LEB size in VTBL code
  ubifs: Set page uptodate in the correct place
  fat: fix uninitialized field in nostale filehandles
  bounds: support non-power-of-two CONFIG_NR_CPUS
  block: Clear zone limits for a non-zoned stacked queue
  block: introduce zone_write_granularity limit
  ext4: correct best extent lstart adjustment logic
  selftests/mqueue: Set timeout to 180 seconds
  crypto: qat - resolve race condition during AER recovery
  crypto: qat - fix double free during reset
  sparc: vDSO: fix return value of __setup handler
  sparc64: NMI watchdog: fix return value of __setup handler
  KVM: Always flush async #PF workqueue when vCPU is being destroyed
  media: xc4000: Fix atomicity violation in xc4000_get_frequency
  serial: max310x: fix NULL pointer dereference in I2C instantiation
  drm/vmwgfx: Fix possible null pointer derefence with invalid contexts
  drm/vmwgfx: Fix some static checker warnings
  drm/vmwgfx/vmwgfx_cmdbuf_res: Remove unused variable 'ret'
  drm/vmwgfx: switch over to the new pin interface v2
  drm/vmwgfx: stop using ttm_bo_create v2
  arm: dts: marvell: Fix maxium->maxim typo in brownstone dts
  smack: Handle SMACK64TRANSMUTE in smack_inode_setsecurity()
  smack: Set SMACK64TRANSMUTE only for dirs in smack_inode_setxattr()
  clk: qcom: gcc-sdm845: Add soft dependency on rpmhpd
  media: staging: ipu3-imgu: Set fields before media_entity_pads_init()
  wifi: brcmfmac: Fix use-after-free bug in brcmf_cfg80211_detach
  timers: Rename del_timer_sync() to timer_delete_sync()
  timers: Use del_timer_sync() even on UP
  timers: Update kernel-doc for various functions
  x86/bugs: Use sysfs_emit()
  x86/cpu: Support AMD Automatic IBRS
  Documentation/hw-vuln: Update spectre doc
  amdkfd: use calloc instead of kzalloc to avoid integer overflow

Change-Id: I7279a2f07527db00e298b47f8f8f44c457fa2ef6
This commit is contained in:
Michael Bestas 2024-08-15 22:14:09 +03:00
commit d613c46b79
No known key found for this signature in database
GPG Key ID: CC95044519BE6669
563 changed files with 7624 additions and 5286 deletions

View File

@ -66,6 +66,9 @@ Description:
echo 0 > /sys/class/devfreq/.../trans_stat
If the transition table is bigger than PAGE_SIZE, reading
this will return an -EFBIG error.
What: /sys/class/devfreq/.../userspace/set_freq
Date: September 2011
Contact: MyungJoo Ham <myungjoo.ham@samsung.com>

View File

@ -516,6 +516,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/mds
/sys/devices/system/cpu/vulnerabilities/meltdown
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
/sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling
/sys/devices/system/cpu/vulnerabilities/retbleed
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
/sys/devices/system/cpu/vulnerabilities/spectre_v1

View File

@ -18,3 +18,4 @@ are configurable at compile, boot or run time.
processor_mmio_stale_data.rst
gather_data_sampling.rst
srso
reg-file-data-sampling

View File

@ -0,0 +1,104 @@
==================================
Register File Data Sampling (RFDS)
==================================
Register File Data Sampling (RFDS) is a microarchitectural vulnerability that
only affects Intel Atom parts(also branded as E-cores). RFDS may allow
a malicious actor to infer data values previously used in floating point
registers, vector registers, or integer registers. RFDS does not provide the
ability to choose which data is inferred. CVE-2023-28746 is assigned to RFDS.
Affected Processors
===================
Below is the list of affected Intel processors [#f1]_:
=================== ============
Common name Family_Model
=================== ============
ATOM_GOLDMONT 06_5CH
ATOM_GOLDMONT_D 06_5FH
ATOM_GOLDMONT_PLUS 06_7AH
ATOM_TREMONT_D 06_86H
ATOM_TREMONT 06_96H
ALDERLAKE 06_97H
ALDERLAKE_L 06_9AH
ATOM_TREMONT_L 06_9CH
RAPTORLAKE 06_B7H
RAPTORLAKE_P 06_BAH
ALDERLAKE_N 06_BEH
RAPTORLAKE_S 06_BFH
=================== ============
As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and
RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as
vulnerable in Linux because they share the same family/model with an affected
part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or
CPUID.HYBRID. This information could be used to distinguish between the
affected and unaffected parts, but it is deemed not worth adding complexity as
the reporting is fixed automatically when these parts enumerate RFDS_NO.
Mitigation
==========
Intel released a microcode update that enables software to clear sensitive
information using the VERW instruction. Like MDS, RFDS deploys the same
mitigation strategy to force the CPU to clear the affected buffers before an
attacker can extract the secrets. This is achieved by using the otherwise
unused and obsolete VERW instruction in combination with a microcode update.
The microcode clears the affected CPU buffers when the VERW instruction is
executed.
Mitigation points
-----------------
VERW is executed by the kernel before returning to user space, and by KVM
before VMentry. None of the affected cores support SMT, so VERW is not required
at C-state transitions.
New bits in IA32_ARCH_CAPABILITIES
----------------------------------
Newer processors and microcode update on existing affected processors added new
bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
vulnerability and mitigation capability:
- Bit 27 - RFDS_NO - When set, processor is not affected by RFDS.
- Bit 28 - RFDS_CLEAR - When set, processor is affected by RFDS, and has the
microcode that clears the affected buffers on VERW execution.
Mitigation control on the kernel command line
---------------------------------------------
The kernel command line allows to control RFDS mitigation at boot time with the
parameter "reg_file_data_sampling=". The valid arguments are:
========== =================================================================
on If the CPU is vulnerable, enable mitigation; CPU buffer clearing
on exit to userspace and before entering a VM.
off Disables mitigation.
========== =================================================================
Mitigation default is selected by CONFIG_MITIGATION_RFDS.
Mitigation status information
-----------------------------
The Linux kernel provides a sysfs interface to enumerate the current
vulnerability status of the system: whether the system is vulnerable, and
which mitigations are active. The relevant sysfs file is:
/sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling
The possible values in this file are:
.. list-table::
* - 'Not affected'
- The processor is not vulnerable
* - 'Vulnerable'
- The processor is vulnerable, but no mitigation enabled
* - 'Vulnerable: No microcode'
- The processor is vulnerable but microcode is not updated.
* - 'Mitigation: Clear Register File'
- The processor is vulnerable and the CPU buffer clearing mitigation is
enabled.
References
----------
.. [#f1] Affected Processors
https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html

View File

@ -484,11 +484,14 @@ Spectre variant 2
Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
boot, by setting the IBRS bit, and they're automatically protected against
Spectre v2 variant attacks, including cross-thread branch target injections
on SMT systems (STIBP). In other words, eIBRS enables STIBP too.
Spectre v2 variant attacks.
Legacy IBRS systems clear the IBRS bit on exit to userspace and
therefore explicitly enable STIBP for that
On Intel's enhanced IBRS systems, this includes cross-thread branch target
injections on SMT systems (STIBP). In other words, Intel eIBRS enables
STIBP, too.
AMD Automatic IBRS does not protect userspace, and Legacy IBRS systems clear
the IBRS bit on exit to userspace, therefore both explicitly enable STIBP.
The retpoline mitigation is turned on by default on vulnerable
CPUs. It can be forced on or off by the administrator
@ -622,9 +625,10 @@ kernel command line.
retpoline,generic Retpolines
retpoline,lfence LFENCE; indirect branch
retpoline,amd alias for retpoline,lfence
eibrs enhanced IBRS
eibrs,retpoline enhanced IBRS + Retpolines
eibrs,lfence enhanced IBRS + LFENCE
eibrs Enhanced/Auto IBRS
eibrs,retpoline Enhanced/Auto IBRS + Retpolines
eibrs,lfence Enhanced/Auto IBRS + LFENCE
ibrs use IBRS to protect kernel
Not specifying this option is equivalent to
spectre_v2=auto.

View File

@ -1029,6 +1029,26 @@
The filter can be disabled or changed to another
driver later using sysfs.
reg_file_data_sampling=
[X86] Controls mitigation for Register File Data
Sampling (RFDS) vulnerability. RFDS is a CPU
vulnerability which may allow userspace to infer
kernel data values previously stored in floating point
registers, vector registers, or integer registers.
RFDS only affects Intel Atom processors.
on: Turns ON the mitigation.
off: Turns OFF the mitigation.
This parameter overrides the compile time default set
by CONFIG_MITIGATION_RFDS. Mitigation cannot be
disabled when other VERW based mitigations (like MDS)
are enabled. In order to disable RFDS mitigation all
VERW based mitigations need to be disabled.
For details see:
Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
driver_async_probe= [KNL]
List of driver names to be probed asynchronously.
Format: <driver_name1>,<driver_name2>...
@ -2985,6 +3005,7 @@
nopti [X86,PPC]
nospectre_v1 [X86,PPC]
nospectre_v2 [X86,PPC,S390,ARM64]
reg_file_data_sampling=off [X86]
retbleed=off [X86]
spec_store_bypass_disable=off [X86,PPC]
spectre_v2_user=off [X86]
@ -5197,9 +5218,9 @@
retpoline,generic - Retpolines
retpoline,lfence - LFENCE; indirect branch
retpoline,amd - alias for retpoline,lfence
eibrs - enhanced IBRS
eibrs,retpoline - enhanced IBRS + Retpolines
eibrs,lfence - enhanced IBRS + LFENCE
eibrs - Enhanced/Auto IBRS
eibrs,retpoline - Enhanced/Auto IBRS + Retpolines
eibrs,lfence - Enhanced/Auto IBRS + LFENCE
ibrs - use IBRS to protect kernel
Not specifying this option is equivalent to

View File

@ -94,7 +94,6 @@ class KernelInclude(Include):
# HINT: this is the only line I had to change / commented out:
#path = utils.relative_path(None, path)
path = nodes.reprunicode(path)
encoding = self.options.get(
'encoding', self.state.document.settings.input_encoding)
e_handler=self.state.document.settings.input_encoding_error_handler

View File

@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing:
mds_clear_cpu_buffers()
Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
Other than CFLAGS.ZF, this macro doesn't clobber any registers.
The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
(idle) transitions.
@ -138,17 +141,30 @@ Mitigation points
When transitioning from kernel to user space the CPU buffers are flushed
on affected CPUs when the mitigation is not disabled on the kernel
command line. The migitation is enabled through the static key
mds_user_clear.
command line. The mitigation is enabled through the feature flag
X86_FEATURE_CLEAR_CPU_BUF.
The mitigation is invoked in prepare_exit_to_usermode() which covers
all but one of the kernel to user space transitions. The exception
is when we return from a Non Maskable Interrupt (NMI), which is
handled directly in do_nmi().
The mitigation is invoked just before transitioning to userspace after
user registers are restored. This is done to minimize the window in
which kernel data could be accessed after VERW e.g. via an NMI after
VERW.
(The reason that NMI is special is that prepare_exit_to_usermode() can
enable IRQs. In NMI context, NMIs are blocked, and we don't want to
enable IRQs with NMIs blocked.)
**Corner case not handled**
Interrupts returning to kernel don't clear CPUs buffers since the
exit-to-user path is expected to do that anyways. But, there could be
a case when an NMI is generated in kernel after the exit-to-user path
has cleared the buffers. This case is not handled and NMI returning to
kernel don't clear CPU buffers because:
1. It is rare to get an NMI after VERW, but before returning to userspace.
2. For an unprivileged user, there is no known way to make that NMI
less rare or target it.
3. It would take a large number of these precisely-timed NMIs to mount
an actual attack. There's presumably not enough bandwidth.
4. The NMI in question occurs after a VERW, i.e. when user state is
restored and most interesting data is already scrubbed. Whats left
is only the data that NMI touches, and that may or may not be of
any interest.
2. C-State transition

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 10
SUBLEVEL = 214
SUBLEVEL = 218
EXTRAVERSION =
NAME = Dare mighty things

File diff suppressed because it is too large Load Diff

View File

@ -9,6 +9,14 @@
#
source "arch/$(SRCARCH)/Kconfig"
config ARCH_CONFIGURES_CPU_MITIGATIONS
bool
if !ARCH_CONFIGURES_CPU_MITIGATIONS
config CPU_MITIGATIONS
def_bool y
endif
menu "General architecture-dependent options"
config CRASH_CORE

View File

@ -205,7 +205,6 @@ dmac_cfg_clk: dmac-gpu-cfg-clk {
};
gmac: ethernet@8000 {
#interrupt-cells = <1>;
compatible = "snps,dwmac";
reg = <0x8000 0x2000>;
interrupts = <10>;

View File

@ -28,7 +28,7 @@ &uart3 {
&twsi1 {
status = "okay";
pmic: max8925@3c {
compatible = "maxium,max8925";
compatible = "maxim,max8925";
reg = <0x3c>;
interrupts = <1>;
interrupt-parent = <&intcmux4>;

View File

@ -127,7 +127,7 @@ ethernet_phy0: ethernet-phy@5 {
};
&pio {
eth_default: eth_default {
eth_default: eth-default-pins {
tx_pins {
pinmux = <MT2712_PIN_71_GBE_TXD3__FUNC_GBE_TXD3>,
<MT2712_PIN_72_GBE_TXD2__FUNC_GBE_TXD2>,
@ -154,7 +154,7 @@ mdio_pins {
};
};
eth_sleep: eth_sleep {
eth_sleep: eth-sleep-pins {
tx_pins {
pinmux = <MT2712_PIN_71_GBE_TXD3__FUNC_GPIO71>,
<MT2712_PIN_72_GBE_TXD2__FUNC_GPIO72>,
@ -180,14 +180,14 @@ mdio_pins {
};
};
usb0_id_pins_float: usb0_iddig {
usb0_id_pins_float: usb0-iddig-pins {
pins_iddig {
pinmux = <MT2712_PIN_12_IDDIG_P0__FUNC_IDDIG_A>;
bias-pull-up;
};
};
usb1_id_pins_float: usb1_iddig {
usb1_id_pins_float: usb1-iddig-pins {
pins_iddig {
pinmux = <MT2712_PIN_14_IDDIG_P1__FUNC_IDDIG_B>;
bias-pull-up;

View File

@ -249,10 +249,11 @@ topckgen: syscon@10000000 {
#clock-cells = <1>;
};
infracfg: syscon@10001000 {
infracfg: clock-controller@10001000 {
compatible = "mediatek,mt2712-infracfg", "syscon";
reg = <0 0x10001000 0 0x1000>;
#clock-cells = <1>;
#reset-cells = <1>;
};
pericfg: syscon@10003000 {

View File

@ -244,7 +244,7 @@ scpsys: power-controller@10006000 {
clock-names = "hif_sel";
};
cir: cir@10009000 {
cir: ir-receiver@10009000 {
compatible = "mediatek,mt7622-cir";
reg = <0 0x10009000 0 0x1000>;
interrupts = <GIC_SPI 175 IRQ_TYPE_LEVEL_LOW>;
@ -275,16 +275,14 @@ thermal_calibration: calib@198 {
};
};
apmixedsys: apmixedsys@10209000 {
compatible = "mediatek,mt7622-apmixedsys",
"syscon";
apmixedsys: clock-controller@10209000 {
compatible = "mediatek,mt7622-apmixedsys";
reg = <0 0x10209000 0 0x1000>;
#clock-cells = <1>;
};
topckgen: topckgen@10210000 {
compatible = "mediatek,mt7622-topckgen",
"syscon";
topckgen: clock-controller@10210000 {
compatible = "mediatek,mt7622-topckgen";
reg = <0 0x10210000 0 0x1000>;
#clock-cells = <1>;
};
@ -357,7 +355,7 @@ cci_control1: slave-if@4000 {
};
cci_control2: slave-if@5000 {
compatible = "arm,cci-400-ctrl-if";
compatible = "arm,cci-400-ctrl-if", "syscon";
interface-type = "ace";
reg = <0x5000 0x1000>;
};
@ -507,7 +505,6 @@ thermal: thermal@1100b000 {
<&pericfg CLK_PERI_AUXADC_PD>;
clock-names = "therm", "auxadc";
resets = <&pericfg MT7622_PERI_THERM_SW_RST>;
reset-names = "therm";
mediatek,auxadc = <&auxadc>;
mediatek,apmixedsys = <&apmixedsys>;
nvmem-cells = <&thermal_calibration>;
@ -715,9 +712,8 @@ wmac: wmac@18000000 {
power-domains = <&scpsys MT7622_POWER_DOMAIN_WB>;
};
ssusbsys: ssusbsys@1a000000 {
compatible = "mediatek,mt7622-ssusbsys",
"syscon";
ssusbsys: clock-controller@1a000000 {
compatible = "mediatek,mt7622-ssusbsys";
reg = <0 0x1a000000 0 0x1000>;
#clock-cells = <1>;
#reset-cells = <1>;
@ -774,9 +770,8 @@ u2port1: usb-phy@1a0c5000 {
};
};
pciesys: pciesys@1a100800 {
compatible = "mediatek,mt7622-pciesys",
"syscon";
pciesys: clock-controller@1a100800 {
compatible = "mediatek,mt7622-pciesys";
reg = <0 0x1a100800 0 0x1000>;
#clock-cells = <1>;
#reset-cells = <1>;
@ -893,7 +888,13 @@ sata_port: sata-phy@1a243000 {
};
};
ethsys: syscon@1b000000 {
hifsys: clock-controller@1af00000 {
compatible = "mediatek,mt7622-hifsys";
reg = <0 0x1af00000 0 0x70>;
#clock-cells = <1>;
};
ethsys: clock-controller@1b000000 {
compatible = "mediatek,mt7622-ethsys",
"syscon";
reg = <0 0x1b000000 0 0x1000>;
@ -911,10 +912,28 @@ hsdma: dma-controller@1b007000 {
#dma-cells = <1>;
};
eth: ethernet@1b100000 {
compatible = "mediatek,mt7622-eth",
"mediatek,mt2701-eth",
pcie_mirror: pcie-mirror@10000400 {
compatible = "mediatek,mt7622-pcie-mirror",
"syscon";
reg = <0 0x10000400 0 0x10>;
};
wed0: wed@1020a000 {
compatible = "mediatek,mt7622-wed",
"syscon";
reg = <0 0x1020a000 0 0x1000>;
interrupts = <GIC_SPI 214 IRQ_TYPE_LEVEL_LOW>;
};
wed1: wed@1020b000 {
compatible = "mediatek,mt7622-wed",
"syscon";
reg = <0 0x1020b000 0 0x1000>;
interrupts = <GIC_SPI 215 IRQ_TYPE_LEVEL_LOW>;
};
eth: ethernet@1b100000 {
compatible = "mediatek,mt7622-eth";
reg = <0 0x1b100000 0 0x20000>;
interrupts = <GIC_SPI 223 IRQ_TYPE_LEVEL_LOW>,
<GIC_SPI 224 IRQ_TYPE_LEVEL_LOW>,
@ -937,6 +956,11 @@ eth: ethernet@1b100000 {
power-domains = <&scpsys MT7622_POWER_DOMAIN_ETHSYS>;
mediatek,ethsys = <&ethsys>;
mediatek,sgmiisys = <&sgmiisys>;
mediatek,cci-control = <&cci_control2>;
mediatek,wed = <&wed0>, <&wed1>;
mediatek,pcie-mirror = <&pcie_mirror>;
mediatek,hifsys = <&hifsys>;
dma-coherent;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";

View File

@ -949,10 +949,10 @@ pcie0: pci@1c00000 {
interrupts = <GIC_SPI 405 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "msi";
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 135 IRQ_TYPE_LEVEL_HIGH>,
<0 0 0 2 &intc 0 136 IRQ_TYPE_LEVEL_HIGH>,
<0 0 0 3 &intc 0 138 IRQ_TYPE_LEVEL_HIGH>,
<0 0 0 4 &intc 0 139 IRQ_TYPE_LEVEL_HIGH>;
interrupt-map = <0 0 0 1 &intc 0 0 135 IRQ_TYPE_LEVEL_HIGH>,
<0 0 0 2 &intc 0 0 136 IRQ_TYPE_LEVEL_HIGH>,
<0 0 0 3 &intc 0 0 138 IRQ_TYPE_LEVEL_HIGH>,
<0 0 0 4 &intc 0 0 139 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GCC_PCIE_0_PIPE_CLK>,
<&gcc GCC_PCIE_0_MSTR_AXI_CLK>,

View File

@ -810,7 +810,8 @@ bluetooth: bluetooth {
vddrf-supply = <&pp1300_l2c>;
vddch0-supply = <&pp3300_l10c>;
max-speed = <3200000>;
clocks = <&rpmhcc RPMH_RF_CLK2>;
qcom,local-bd-address-broken;
};
};

View File

@ -1824,10 +1824,10 @@ pcie0: pci@1c00000 {
interrupt-names = "msi";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
<0 0 0 2 &intc 0 150 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
<0 0 0 3 &intc 0 151 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
<0 0 0 4 &intc 0 152 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
interrupt-map = <0 0 0 1 &intc 0 0 0 149 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
<0 0 0 2 &intc 0 0 0 150 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
<0 0 0 3 &intc 0 0 0 151 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
<0 0 0 4 &intc 0 0 0 152 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
clocks = <&gcc GCC_PCIE_0_PIPE_CLK>,
<&gcc GCC_PCIE_0_AUX_CLK>,
@ -1928,10 +1928,10 @@ pcie1: pci@1c08000 {
interrupt-names = "msi";
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 0x7>;
interrupt-map = <0 0 0 1 &intc 0 434 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
<0 0 0 2 &intc 0 435 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
<0 0 0 3 &intc 0 438 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
<0 0 0 4 &intc 0 439 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
interrupt-map = <0 0 0 1 &intc 0 0 0 434 IRQ_TYPE_LEVEL_HIGH>, /* int_a */
<0 0 0 2 &intc 0 0 0 435 IRQ_TYPE_LEVEL_HIGH>, /* int_b */
<0 0 0 3 &intc 0 0 0 438 IRQ_TYPE_LEVEL_HIGH>, /* int_c */
<0 0 0 4 &intc 0 0 0 439 IRQ_TYPE_LEVEL_HIGH>; /* int_d */
clocks = <&gcc GCC_PCIE_1_PIPE_CLK>,
<&gcc GCC_PCIE_1_AUX_CLK>,

View File

@ -732,11 +732,20 @@ hdmi: hdmi@ff3c0000 {
status = "disabled";
ports {
hdmi_in: port {
#address-cells = <1>;
#size-cells = <0>;
hdmi_in: port@0 {
reg = <0>;
hdmi_in_vop: endpoint {
remote-endpoint = <&vop_out_hdmi>;
};
};
hdmi_out: port@1 {
reg = <1>;
};
};
};

View File

@ -784,7 +784,6 @@ &pcie_phy {
};
&pcie0 {
bus-scan-delay-ms = <1000>;
ep-gpios = <&gpio2 RK_PD4 GPIO_ACTIVE_HIGH>;
num-lanes = <4>;
pinctrl-names = "default";

View File

@ -430,16 +430,22 @@ &io_domains {
gpio1830-supply = <&vcc_1v8>;
};
&pmu_io_domains {
status = "okay";
pmu1830-supply = <&vcc_1v8>;
};
&pwm2 {
status = "okay";
&pcie_clkreqn_cpm {
rockchip,pins =
<2 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up>;
};
&pinctrl {
pinctrl-names = "default";
pinctrl-0 = <&q7_thermal_pin>;
gpios {
q7_thermal_pin: q7-thermal-pin {
rockchip,pins =
<0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
i2c8 {
i2c8_xfer_a: i2c8-xfer {
rockchip,pins =
@ -470,6 +476,15 @@ vcc5v0_host_en: vcc5v0-host-en {
};
};
&pmu_io_domains {
status = "okay";
pmu1830-supply = <&vcc_1v8>;
};
&pwm2 {
status = "okay";
};
&sdhci {
/*
* Signal integrity isn't great at 200MHz but 100MHz has proven stable

View File

@ -1769,6 +1769,7 @@ simple-audio-card,codec {
hdmi: hdmi@ff940000 {
compatible = "rockchip,rk3399-dw-hdmi";
reg = <0x0 0xff940000 0x0 0x20000>;
reg-io-width = <4>;
interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH 0>;
clocks = <&cru PCLK_HDMI_CTRL>,
<&cru SCLK_HDMI_SFR>,
@ -1777,13 +1778,16 @@ hdmi: hdmi@ff940000 {
<&cru PLL_VPLL>;
clock-names = "iahb", "isfr", "cec", "grf", "vpll";
power-domains = <&power RK3399_PD_HDCP>;
reg-io-width = <4>;
rockchip,grf = <&grf>;
#sound-dai-cells = <0>;
status = "disabled";
ports {
hdmi_in: port {
#address-cells = <1>;
#size-cells = <0>;
hdmi_in: port@0 {
reg = <0>;
#address-cells = <1>;
#size-cells = <0>;
@ -1796,6 +1800,10 @@ hdmi_in_vopl: endpoint@1 {
remote-endpoint = <&vopl_out_hdmi>;
};
};
hdmi_out: port@1 {
reg = <1>;
};
};
};

View File

@ -284,16 +284,12 @@ int kvm_register_vgic_device(unsigned long type)
int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
struct vgic_reg_attr *reg_attr)
{
int cpuid;
int cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr);
cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
KVM_DEV_ARM_VGIC_CPUID_SHIFT;
if (cpuid >= atomic_read(&dev->kvm->online_vcpus))
return -EINVAL;
reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid);
reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid);
if (!reg_attr->vcpu)
return -EINVAL;
return 0;
}

View File

@ -64,6 +64,7 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
ELF_DETAILS
.hexagon.attributes 0 : { *(.hexagon.attributes) }
DISCARDS
}

View File

@ -157,7 +157,7 @@ static inline long regs_return_value(struct pt_regs *regs)
#define instruction_pointer(regs) ((regs)->cp0_epc)
#define profile_pc(regs) instruction_pointer(regs)
extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
extern asmlinkage long syscall_trace_enter(struct pt_regs *regs);
extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
extern void die(const char *, struct pt_regs *) __noreturn;

View File

@ -100,6 +100,7 @@ void output_thread_info_defines(void)
OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit);
OFFSET(TI_REGS, thread_info, regs);
OFFSET(TI_SYSCALL, thread_info, syscall);
DEFINE(_THREAD_SIZE, THREAD_SIZE);
DEFINE(_THREAD_MASK, THREAD_MASK);
DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);

View File

@ -1310,16 +1310,13 @@ long arch_ptrace(struct task_struct *child, long request,
* Notification of system call entry/exit
* - triggered by current->work.syscall_trace
*/
asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
asmlinkage long syscall_trace_enter(struct pt_regs *regs)
{
user_exit();
current_thread_info()->syscall = syscall;
if (test_thread_flag(TIF_SYSCALL_TRACE)) {
if (tracehook_report_syscall_entry(regs))
return -1;
syscall = current_thread_info()->syscall;
}
#ifdef CONFIG_SECCOMP
@ -1328,7 +1325,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
struct seccomp_data sd;
unsigned long args[6];
sd.nr = syscall;
sd.nr = current_thread_info()->syscall;
sd.arch = syscall_get_arch(current);
syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
@ -1338,23 +1335,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
ret = __secure_computing(&sd);
if (ret == -1)
return ret;
syscall = current_thread_info()->syscall;
}
#endif
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->regs[2]);
audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
audit_syscall_entry(current_thread_info()->syscall,
regs->regs[4], regs->regs[5],
regs->regs[6], regs->regs[7]);
/*
* Negative syscall numbers are mistaken for rejected syscalls, but
* won't have had the return value set appropriately, so we do so now.
*/
if (syscall < 0)
if (current_thread_info()->syscall < 0)
syscall_set_return_value(current, regs, -ENOSYS, 0);
return syscall;
return current_thread_info()->syscall;
}
/*

View File

@ -80,6 +80,18 @@ loads_done:
PTR load_a7, bad_stack_a7
.previous
/*
* syscall number is in v0 unless we called syscall(__NR_###)
* where the real syscall number is in a0
*/
subu t2, v0, __NR_O32_Linux
bnez t2, 1f /* __NR_syscall at offset 0 */
LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
b 2f
1:
LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
2:
lw t0, TI_FLAGS($28) # syscall tracing enabled?
li t1, _TIF_WORK_SYSCALL_ENTRY
and t0, t1
@ -117,16 +129,7 @@ syscall_trace_entry:
SAVE_STATIC
move a0, sp
/*
* syscall number is in v0 unless we called syscall(__NR_###)
* where the real syscall number is in a0
*/
move a1, v0
subu t2, v0, __NR_O32_Linux
bnez t2, 1f /* __NR_syscall at offset 0 */
lw a1, PT_R4(sp)
1: jal syscall_trace_enter
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall

View File

@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
LONG_S v0, TI_SYSCALL($28) # Store syscall number
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@ -72,7 +74,6 @@ syscall_common:
n32_syscall_trace_entry:
SAVE_STATIC
move a0, sp
move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall

View File

@ -47,6 +47,8 @@ NESTED(handle_sys64, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
LONG_S v0, TI_SYSCALL($28) # Store syscall number
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@ -83,7 +85,6 @@ n64_syscall_exit:
syscall_trace_entry:
SAVE_STATIC
move a0, sp
move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall

View File

@ -79,6 +79,22 @@ loads_done:
PTR load_a7, bad_stack_a7
.previous
/*
* absolute syscall number is in v0 unless we called syscall(__NR_###)
* where the real syscall number is in a0
* note: NR_syscall is the first O32 syscall but the macro is
* only defined when compiling with -mabi=32 (CONFIG_32BIT)
* therefore __NR_O32_Linux is used (4000)
*/
subu t2, v0, __NR_O32_Linux
bnez t2, 1f /* __NR_syscall at offset 0 */
LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
b 2f
1:
LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
2:
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@ -113,22 +129,7 @@ trace_a_syscall:
sd a7, PT_R11(sp) # For indirect syscalls
move a0, sp
/*
* absolute syscall number is in v0 unless we called syscall(__NR_###)
* where the real syscall number is in a0
* note: NR_syscall is the first O32 syscall but the macro is
* only defined when compiling with -mabi=32 (CONFIG_32BIT)
* therefore __NR_O32_Linux is used (4000)
*/
.set push
.set reorder
subu t1, v0, __NR_O32_Linux
move a1, v0
bnez t1, 1f /* __NR_syscall at offset 0 */
ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
.set pop
1: jal syscall_trace_enter
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall

View File

@ -83,26 +83,28 @@
* version takes two arguments: a src and destination register.
* However, the source and destination registers can not be
* the same register.
*
* We use add,l to avoid clobbering the C/B bits in the PSW.
*/
.macro tophys grvirt, grphys
ldil L%(__PAGE_OFFSET), \grphys
sub \grvirt, \grphys, \grphys
ldil L%(-__PAGE_OFFSET), \grphys
addl \grvirt, \grphys, \grphys
.endm
.macro tovirt grphys, grvirt
ldil L%(__PAGE_OFFSET), \grvirt
add \grphys, \grvirt, \grvirt
addl \grphys, \grvirt, \grvirt
.endm
.macro tophys_r1 gr
ldil L%(__PAGE_OFFSET), %r1
sub \gr, %r1, \gr
ldil L%(-__PAGE_OFFSET), %r1
addl \gr, %r1, \gr
.endm
.macro tovirt_r1 gr
ldil L%(__PAGE_OFFSET), %r1
add \gr, %r1, \gr
addl \gr, %r1, \gr
.endm
.macro delay value

View File

@ -40,7 +40,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
" addc %0, %5, %0\n"
" addc %0, %3, %0\n"
"1: ldws,ma 4(%1), %3\n"
" addib,< 0, %2, 1b\n"
" addib,> -1, %2, 1b\n"
" addc %0, %3, %0\n"
"\n"
" extru %0, 31, 16, %4\n"
@ -126,6 +126,7 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
** Try to keep 4 registers with "live" values ahead of the ALU.
*/
" depdi 0, 31, 32, %0\n"/* clear upper half of incoming checksum */
" ldd,ma 8(%1), %4\n" /* get 1st saddr word */
" ldd,ma 8(%2), %5\n" /* get 1st daddr word */
" add %4, %0, %0\n"
@ -137,8 +138,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
" add,dc %3, %0, %0\n" /* fold in proto+len | carry bit */
" extrd,u %0, 31, 32, %4\n"/* copy upper half down */
" depdi 0, 31, 32, %0\n"/* clear upper half */
" add %4, %0, %0\n" /* fold into 32-bits */
" addc 0, %0, %0\n" /* add carry */
" add,dc %4, %0, %0\n" /* fold into 32-bits, plus carry */
" addc 0, %0, %0\n" /* add final carry */
#else
@ -163,7 +164,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
" ldw,ma 4(%2), %7\n" /* 4th daddr */
" addc %6, %0, %0\n"
" addc %7, %0, %0\n"
" addc %3, %0, %0\n" /* fold in proto+len, catch carry */
" addc %3, %0, %0\n" /* fold in proto+len */
" addc 0, %0, %0\n" /* add carry */
#endif
: "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len),

View File

@ -12,9 +12,16 @@
#ifndef __ASSEMBLY__
/* Performance Monitor Registers */
#define mfpmr(rn) ({unsigned int rval; \
asm volatile("mfpmr %0," __stringify(rn) \
asm volatile(".machine push; " \
".machine e300; " \
"mfpmr %0," __stringify(rn) ";" \
".machine pop; " \
: "=r" (rval)); rval;})
#define mtpmr(rn, v) asm volatile("mtpmr " __stringify(rn) ",%0" : : "r" (v))
#define mtpmr(rn, v) asm volatile(".machine push; " \
".machine e300; " \
"mtpmr " __stringify(rn) ",%0; " \
".machine pop; " \
: : "r" (v))
#endif /* __ASSEMBLY__ */
/* Freescale Book E Performance Monitor APU Registers */

View File

@ -67,6 +67,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o
CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec)
obj-$(CONFIG_PPC64) += $(obj64-y)

View File

@ -445,6 +445,14 @@ config EFI
allow the kernel to be booted as an EFI application. This
is only useful on systems that have UEFI firmware.
config CC_HAVE_STACKPROTECTOR_TLS
def_bool $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=tp -mstack-protector-guard-offset=0)
config STACKPROTECTOR_PER_TASK
def_bool y
depends on !GCC_PLUGIN_RANDSTRUCT
depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS
endmenu
config BUILTIN_DTB

View File

@ -88,6 +88,16 @@ KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
# architectures. It's faster to have GCC emit only aligned accesses.
KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
prepare: stack_protector_prepare
stack_protector_prepare: prepare0
$(eval KBUILD_CFLAGS += -mstack-protector-guard=tls \
-mstack-protector-guard-reg=tp \
-mstack-protector-guard-offset=$(shell \
awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}' \
include/generated/asm-offsets.h))
endif
# arch specific predefines for sparse
CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS)

View File

@ -456,8 +456,8 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define PAGE_SHARED __pgprot(0)
#define PAGE_KERNEL __pgprot(0)
#define swapper_pg_dir NULL
#define TASK_SIZE 0xffffffffUL
#define VMALLOC_START 0
#define TASK_SIZE _AC(-1, UL)
#define VMALLOC_START _AC(0, UL)
#define VMALLOC_END TASK_SIZE
static inline void __kernel_map_pages(struct page *page, int numpages, int enable) {}

View File

@ -24,6 +24,7 @@ static __always_inline void boot_init_stack_canary(void)
canary &= CANARY_MASK;
current->stack_canary = canary;
__stack_chk_guard = current->stack_canary;
if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK))
__stack_chk_guard = current->stack_canary;
}
#endif /* _ASM_RISCV_STACKPROTECTOR_H */

View File

@ -468,7 +468,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
long __kr_err; \
long __kr_err = 0; \
\
__get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
if (unlikely(__kr_err)) \
@ -477,7 +477,7 @@ do { \
#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
long __kr_err; \
long __kr_err = 0; \
\
__put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
if (unlikely(__kr_err)) \

View File

@ -66,6 +66,9 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_F30, task_struct, thread.fstate.f[30]);
OFFSET(TASK_THREAD_F31, task_struct, thread.fstate.f[31]);
OFFSET(TASK_THREAD_FCSR, task_struct, thread.fstate.fcsr);
#ifdef CONFIG_STACKPROTECTOR
OFFSET(TSK_STACK_CANARY, task_struct, stack_canary);
#endif
DEFINE(PT_SIZE, sizeof(struct pt_regs));
OFFSET(PT_EPC, pt_regs, epc);

View File

@ -22,9 +22,7 @@
#include <asm/switch_to.h>
#include <asm/thread_info.h>
register unsigned long gp_in_global __asm__("gp");
#ifdef CONFIG_STACKPROTECTOR
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
@ -117,7 +115,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gp = gp_in_global;
/* Supervisor/Machine, irqs on: */
childregs->status = SR_PP | SR_PIE;

View File

@ -9,6 +9,7 @@
#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
#define CFI_RESTORE .cfi_restore
#define CFI_REL_OFFSET .cfi_rel_offset
#ifdef CONFIG_AS_CFI_VAL_OFFSET
#define CFI_VAL_OFFSET .cfi_val_offset

View File

@ -1298,6 +1298,7 @@ ENDPROC(stack_overflow)
#endif
.section .rodata, "a"
.balign 8
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
.globl sys_call_table
sys_call_table:

View File

@ -23,8 +23,10 @@ __kernel_\func:
CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
stg %r14,STACK_FRAME_OVERHEAD(%r15)
CFI_REL_OFFSET 14, STACK_FRAME_OVERHEAD
brasl %r14,__s390_vdso_\func
lg %r14,STACK_FRAME_OVERHEAD(%r15)
CFI_RESTORE 14
aghi %r15,WRAPPER_FRAME_SIZE
CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15

View File

@ -2627,7 +2627,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
return 0;
start = pmd_val(*pmd) & HPAGE_MASK;
end = start + HPAGE_SIZE - 1;
end = start + HPAGE_SIZE;
__storage_key_init_range(start, end);
set_bit(PG_arch_1, &page->flags);
cond_resched();

View File

@ -146,7 +146,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
}
if (!test_and_set_bit(PG_arch_1, &page->flags))
__storage_key_init_range(paddr, paddr + size - 1);
__storage_key_init_range(paddr, paddr + size);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,

View File

@ -274,7 +274,7 @@ static int __init setup_nmi_watchdog(char *str)
if (!strncmp(str, "panic", 5))
panic_on_timeout = 1;
return 0;
return 1;
}
__setup("nmi_watchdog=", setup_nmi_watchdog);

View File

@ -449,9 +449,8 @@ static __init int vdso_setup(char *s)
unsigned long val;
err = kstrtoul(s, 10, &val);
if (err)
return err;
vdso_enabled = val;
return 0;
if (!err)
vdso_enabled = val;
return 1;
}
__setup("vdso=", vdso_setup);

View File

@ -58,6 +58,7 @@ config X86
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_32BIT_OFF_T if X86_32
select ARCH_CONFIGURES_CPU_MITIGATIONS
select ARCH_CLOCKSOURCE_INIT
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CPU_FINALIZE_INIT
@ -359,10 +360,6 @@ config X86_64_SMP
def_bool y
depends on X86_64 && SMP
config X86_32_LAZY_GS
def_bool y
depends on X86_32 && !STACKPROTECTOR
config ARCH_SUPPORTS_UPROBES
def_bool y
@ -385,7 +382,8 @@ config CC_HAS_SANE_STACKPROTECTOR
default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC))
help
We have to make sure stack protector is unconditionally disabled if
the compiler produces broken code.
the compiler produces broken code or if it does not let us control
the segment on 32-bit kernels.
menu "Processor type and features"
@ -2418,17 +2416,17 @@ config CC_HAS_SLS
config CC_HAS_RETURN_THUNK
def_bool $(cc-option,-mfunction-return=thunk-extern)
menuconfig SPECULATION_MITIGATIONS
bool "Mitigations for speculative execution vulnerabilities"
menuconfig CPU_MITIGATIONS
bool "Mitigations for CPU vulnerabilities"
default y
help
Say Y here to enable options which enable mitigations for
speculative execution hardware vulnerabilities.
Say Y here to enable options which enable mitigations for hardware
vulnerabilities (usually related to speculative execution).
If you say N, all mitigations will be disabled. You really
should know what you are doing to say so.
if SPECULATION_MITIGATIONS
if CPU_MITIGATIONS
config PAGE_TABLE_ISOLATION
bool "Remove the kernel mapping in user mode"
@ -2518,6 +2516,17 @@ config GDS_FORCE_MITIGATION
If in doubt, say N.
config MITIGATION_RFDS
bool "RFDS Mitigation"
depends on CPU_SUP_INTEL
default y
help
Enable mitigation for Register File Data Sampling (RFDS) by default.
RFDS is a hardware vulnerability which affects Intel Atom CPUs. It
allows unprivileged speculative access to stale data previously
stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
endif
config ARCH_HAS_ADD_PAGES

View File

@ -87,6 +87,14 @@ ifeq ($(CONFIG_X86_32),y)
# temporary until string.h is fixed
KBUILD_CFLAGS += -ffreestanding
ifeq ($(CONFIG_STACKPROTECTOR),y)
ifeq ($(CONFIG_SMP),y)
KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
else
KBUILD_CFLAGS += -mstack-protector-guard=global
endif
endif
else
BITS := 64
UTS_MACHINE := x86_64

View File

@ -6,6 +6,9 @@
#include <linux/linkage.h>
#include <asm/export.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
#include <asm/segment.h>
#include <asm/cache.h>
.pushsection .noinstr.text, "ax"
@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
EXPORT_SYMBOL_GPL(entry_ibpb);
.popsection
/*
* Define the VERW operand that is disguised as entry code so that
* it can be referenced with KPTI enabled. This ensure VERW can be
* used late in exit-to-user path after page tables are switched.
*/
.pushsection .entry.text, "ax"
.align L1_CACHE_BYTES, 0xcc
SYM_CODE_START_NOALIGN(mds_verw_sel)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
.word __KERNEL_DS
.align L1_CACHE_BYTES, 0xcc
SYM_CODE_END(mds_verw_sel);
/* For KVM */
EXPORT_SYMBOL_GPL(mds_verw_sel);
.popsection

View File

@ -20,7 +20,7 @@
* 1C(%esp) - %ds
* 20(%esp) - %es
* 24(%esp) - %fs
* 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
* 28(%esp) - unused -- was %gs on old stackprotector kernels
* 2C(%esp) - orig_eax
* 30(%esp) - %eip
* 34(%esp) - %cs
@ -56,14 +56,9 @@
/*
* User gs save/restore
*
* %gs is used for userland TLS and kernel only uses it for stack
* canary which is required to be at %gs:20 by gcc. Read the comment
* at the top of stackprotector.h for more info.
*
* Local labels 98 and 99 are used.
* This is leftover junk from CONFIG_X86_32_LAZY_GS. A subsequent patch
* will remove it entirely.
*/
#ifdef CONFIG_X86_32_LAZY_GS
/* unfortunately push/pop can't be no-op */
.macro PUSH_GS
pushl $0
@ -86,49 +81,6 @@
.macro SET_KERNEL_GS reg
.endm
#else /* CONFIG_X86_32_LAZY_GS */
.macro PUSH_GS
pushl %gs
.endm
.macro POP_GS pop=0
98: popl %gs
.if \pop <> 0
add $\pop, %esp
.endif
.endm
.macro POP_GS_EX
.pushsection .fixup, "ax"
99: movl $0, (%esp)
jmp 98b
.popsection
_ASM_EXTABLE(98b, 99b)
.endm
.macro PTGS_TO_GS
98: mov PT_GS(%esp), %gs
.endm
.macro PTGS_TO_GS_EX
.pushsection .fixup, "ax"
99: movl $0, PT_GS(%esp)
jmp 98b
.popsection
_ASM_EXTABLE(98b, 99b)
.endm
.macro GS_TO_REG reg
movl %gs, \reg
.endm
.macro REG_TO_PTGS reg
movl \reg, PT_GS(%esp)
.endm
.macro SET_KERNEL_GS reg
movl $(__KERNEL_STACK_CANARY), \reg
movl \reg, %gs
.endm
#endif /* CONFIG_X86_32_LAZY_GS */
/* Unconditionally switch to user cr3 */
.macro SWITCH_TO_USER_CR3 scratch_reg:req
@ -779,7 +731,7 @@ SYM_CODE_START(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movl TASK_stack_canary(%edx), %ebx
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
movl %ebx, PER_CPU_VAR(__stack_chk_guard)
#endif
/*
@ -997,6 +949,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
BUG_IF_WRONG_CR3 no_user_check=1
popfl
popl %eax
CLEAR_CPU_BUFFERS
/*
* Return back to the vDSO, which will pop ecx and edx.
@ -1069,6 +1022,7 @@ restore_all_switch_stack:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
CLEAR_CPU_BUFFERS
.Lirq_return:
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@ -1267,6 +1221,7 @@ SYM_CODE_START(asm_exc_nmi)
/* Not on SYSENTER stack. */
call exc_nmi
CLEAR_CPU_BUFFERS
jmp .Lnmi_return
.Lnmi_from_sysenter_stack:

View File

@ -46,14 +46,6 @@
.code64
.section .entry.text, "ax"
#ifdef CONFIG_PARAVIRT_XXL
SYM_CODE_START(native_usergs_sysret64)
UNWIND_HINT_EMPTY
swapgs
sysretq
SYM_CODE_END(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT_XXL */
/*
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
*
@ -128,7 +120,12 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
* Try to use SYSRET instead of IRET if we're returning to
* a completely clean 64-bit userspace context. If we're not,
* go to the slow exit path.
* In the Xen PV case we must use iret anyway.
*/
ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \
X86_FEATURE_XENPV
movq RCX(%rsp), %rcx
movq RIP(%rsp), %r11
@ -220,7 +217,9 @@ syscall_return_via_sysret:
popq %rdi
popq %rsp
USERGS_SYSRET64
swapgs
CLEAR_CPU_BUFFERS
sysretq
SYM_CODE_END(entry_SYSCALL_64)
/*
@ -615,6 +614,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
/* Restore RDI. */
popq %rdi
SWAPGS
CLEAR_CPU_BUFFERS
INTERRUPT_RETURN
@ -721,6 +721,8 @@ native_irq_return_ldt:
*/
popq %rax /* Restore user RAX */
CLEAR_CPU_BUFFERS
/*
* RSP now points to an ordinary IRET frame, except that the page
* is read-only and RSP[31:16] are preloaded with the userspace
@ -1487,6 +1489,12 @@ nmi_restore:
std
movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
* Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
* NMI in kernel after user state is restored. For an unprivileged user
* these conditions are hard to meet.
*/
/*
* iretq reads the "iret" frame and exits the NMI stack in a
* single instruction. We are returning to kernel mode, so this
@ -1504,6 +1512,7 @@ SYM_CODE_END(asm_exc_nmi)
SYM_CODE_START(ignore_sysret)
UNWIND_HINT_EMPTY
mov $-ENOSYS, %eax
CLEAR_CPU_BUFFERS
sysretl
SYM_CODE_END(ignore_sysret)
#endif

View File

@ -319,6 +319,7 @@ sysret32_from_system_call:
xorl %r9d, %r9d
xorl %r10d, %r10d
swapgs
CLEAR_CPU_BUFFERS
sysretl
SYM_CODE_END(entry_SYSCALL_compat)

View File

@ -12,6 +12,7 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
#include <asm/io.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@ -111,7 +112,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
static inline u32 native_apic_mem_read(u32 reg)
{
return *((volatile u32 *)(APIC_BASE + reg));
return readl((void __iomem *)(APIC_BASE + reg));
}
extern void native_apic_wait_icr_idle(void);

View File

@ -12,6 +12,7 @@
#include <asm/special_insns.h>
#include <asm/preempt.h>
#include <asm/asm.h>
#include <asm/nospec-branch.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);

View File

@ -6,12 +6,14 @@
# define __ASM_FORM(x) x
# define __ASM_FORM_RAW(x) x
# define __ASM_FORM_COMMA(x) x,
# define __ASM_REGPFX %
#else
#include <linux/stringify.h>
# define __ASM_FORM(x) " " __stringify(x) " "
# define __ASM_FORM_RAW(x) __stringify(x)
# define __ASM_FORM_COMMA(x) " " __stringify(x) ","
# define __ASM_REGPFX %%
#endif
#ifndef __x86_64__
@ -48,6 +50,9 @@
#define _ASM_SI __ASM_REG(si)
#define _ASM_DI __ASM_REG(di)
/* Adds a (%rip) suffix on 64 bits only; for immediate memory references */
#define _ASM_RIP(x) __ASM_SEL_RAW(x, x (__ASM_REGPFX rip))
#ifndef __x86_64__
/* 32 bit */

View File

@ -33,6 +33,8 @@ enum cpuid_leafs
CPUID_7_EDX,
CPUID_8000_001F_EAX,
CPUID_8000_0021_EAX,
CPUID_LNX_5,
NR_CPUID_WORDS,
};
#ifdef CONFIG_X86_FEATURE_NAMES
@ -93,8 +95,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
REQUIRED_MASK_CHECK || \
BUILD_BUG_ON_ZERO(NCAPINTS != 21))
BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@ -118,8 +121,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
DISABLED_MASK_CHECK || \
BUILD_BUG_ON_ZERO(NCAPINTS != 21))
BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \

View File

@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
#define NCAPINTS 21 /* N 32-bit words worth of info */
#define NCAPINTS 22 /* N 32-bit words worth of info */
#define NBUGINTS 2 /* N 32-bit bug flags */
/*
@ -300,6 +300,7 @@
#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
#define X86_FEATURE_MSR_TSX_CTRL (11*32+18) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
#define X86_FEATURE_CLEAR_CPU_BUF (11*32+19) /* "" Clear CPU buffers using VERW */
#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
@ -403,6 +404,7 @@
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
@ -452,4 +454,5 @@
/* BUG word 2 */
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
#endif /* _ASM_X86_CPUFEATURES_H */

View File

@ -103,6 +103,7 @@
#define DISABLED_MASK18 0
#define DISABLED_MASK19 0
#define DISABLED_MASK20 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
#define DISABLED_MASK21 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_DISABLED_FEATURES_H */

View File

@ -77,7 +77,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
static __always_inline void arch_exit_to_user_mode(void)
{
mds_user_clear_cpu_buffers();
amd_clear_divider();
}
#define arch_exit_to_user_mode arch_exit_to_user_mode

View File

@ -132,12 +132,6 @@ static __always_inline unsigned long arch_local_irq_save(void)
#endif
#define INTERRUPT_RETURN jmp native_iret
#define USERGS_SYSRET64 \
swapgs; \
sysretq;
#define USERGS_SYSRET32 \
swapgs; \
sysretl
#else
#define INTERRUPT_RETURN iret

View File

@ -30,6 +30,7 @@
#define _EFER_SVME 12 /* Enable virtualization */
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
#define EFER_SCE (1<<_EFER_SCE)
#define EFER_LME (1<<_EFER_LME)
@ -38,6 +39,7 @@
#define EFER_SVME (1<<_EFER_SVME)
#define EFER_LMSLE (1<<_EFER_LMSLE)
#define EFER_FFXSR (1<<_EFER_FFXSR)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
/* Intel MSRs. Some also available on other CPUs */
@ -166,6 +168,14 @@
* CPU is not vulnerable to Gather
* Data Sampling (GDS).
*/
#define ARCH_CAP_RFDS_NO BIT(27) /*
* Not susceptible to Register
* File Data Sampling.
*/
#define ARCH_CAP_RFDS_CLEAR BIT(28) /*
* VERW clears CPU Register
* File.
*/
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*

View File

@ -155,11 +155,20 @@
.Lskip_rsb_\@:
.endm
/*
* The CALL to srso_alias_untrain_ret() must be patched in directly at
* the spot where untraining must be done, ie., srso_alias_untrain_ret()
* must be the target of a CALL instruction instead of indirectly
* jumping to a wrapper which then calls it. Therefore, this macro is
* called outside of __UNTRAIN_RET below, for the time being, before the
* kernel can support nested alternatives with arbitrary nesting.
*/
.macro CALL_UNTRAIN_RET
#ifdef CONFIG_CPU_UNRET_ENTRY
#define CALL_UNTRAIN_RET "call entry_untrain_ret"
#else
#define CALL_UNTRAIN_RET ""
ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
"call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
#endif
.endm
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
@ -176,12 +185,24 @@
#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
defined(CONFIG_CPU_SRSO)
ANNOTATE_UNRET_END
ALTERNATIVE_2 "", \
CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
"call entry_ibpb", X86_FEATURE_ENTRY_IBPB
CALL_UNTRAIN_RET
ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
#endif
.endm
/*
* Macro to execute VERW instruction that mitigate transient data sampling
* attacks such as MDS. On affected systems a microcode update overloaded VERW
* instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
*
* Note: Only the memory operand variant of VERW clears the CPU buffers.
*/
.macro CLEAR_CPU_BUFFERS
ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF
verw _ASM_RIP(mds_verw_sel)
.Lskip_verw_\@:
.endm
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@ -357,11 +378,12 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
extern u16 mds_verw_sel;
#include <asm/segment.h>
/**
@ -387,17 +409,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
}
/**
* mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
*/
static __always_inline void mds_user_clear_cpu_buffers(void)
{
if (static_branch_likely(&mds_user_clear))
mds_clear_cpu_buffers();
}
/**
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
*

View File

@ -776,11 +776,6 @@ extern void default_banner(void);
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT_XXL
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \
ANNOTATE_RETPOLINE_SAFE; \
jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);)
#ifdef CONFIG_DEBUG_ENTRY
#define SAVE_FLAGS(clobbers) \
PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \

View File

@ -157,14 +157,6 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter);
/*
* Switch to usermode gs and return to 64-bit usermode using
* sysret. Only used in 64-bit kernels to return to 64-bit
* processes. Usermode register state, including %rsp, must
* already be restored.
*/
void (*usergs_sysret64)(void);
/* Normal iret. Jump to this with the standard iret stack
frame set up. */
void (*iret)(void);

View File

@ -441,6 +441,9 @@ struct fixed_percpu_data {
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*
* Once we are willing to require -mstack-protector-guard-symbol=
* support for x86_64 stackprotector, we can get rid of this.
*/
char gs_base[40];
unsigned long stack_canary;
@ -461,17 +464,7 @@ extern asmlinkage void ignore_sysret(void);
void current_save_fsgs(void);
#else /* X86_64 */
#ifdef CONFIG_STACKPROTECTOR
/*
* Make sure stack canary segment base is cached-aligned:
* "For Intel Atom processors, avoid non zero segment base address
* that is not aligned to cache line boundary at all cost."
* (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
*/
struct stack_canary {
char __pad[20]; /* canary at %gs:20 */
unsigned long canary;
};
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
#endif
/* Per CPU softirq stack pointer */
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);

View File

@ -37,7 +37,10 @@ struct pt_regs {
unsigned short __esh;
unsigned short fs;
unsigned short __fsh;
/* On interrupt, gs and __gsh store the vector number. */
/*
* On interrupt, gs and __gsh store the vector number. They never
* store gs any more.
*/
unsigned short gs;
unsigned short __gsh;
/* On interrupt, this is the error code. */

View File

@ -103,6 +103,7 @@
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK20 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
#define REQUIRED_MASK21 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */

View File

@ -95,7 +95,7 @@
*
* 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ]
* 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
* 28 - unused
* 29 - unused
* 30 - unused
* 31 - TSS for double fault handler
@ -118,7 +118,6 @@
#define GDT_ENTRY_ESPFIX_SS 26
#define GDT_ENTRY_PERCPU 27
#define GDT_ENTRY_STACK_CANARY 28
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
@ -158,12 +157,6 @@
# define __KERNEL_PERCPU 0
#endif
#ifdef CONFIG_STACKPROTECTOR
# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
#else
# define __KERNEL_STACK_CANARY 0
#endif
#else /* 64-bit: */
#include <asm/cache.h>
@ -364,22 +357,15 @@ static inline void __loadsegment_fs(unsigned short value)
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
/*
* x86-32 user GS accessors:
* x86-32 user GS accessors. This is ugly and could do with some cleaning up.
*/
#ifdef CONFIG_X86_32
# ifdef CONFIG_X86_32_LAZY_GS
# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
# define task_user_gs(tsk) ((tsk)->thread.gs)
# define lazy_save_gs(v) savesegment(gs, (v))
# define lazy_load_gs(v) loadsegment(gs, (v))
# else /* X86_32_LAZY_GS */
# define get_user_gs(regs) (u16)((regs)->gs)
# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
# define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
# define lazy_save_gs(v) do { } while (0)
# define lazy_load_gs(v) do { } while (0)
# endif /* X86_32_LAZY_GS */
# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
# define task_user_gs(tsk) ((tsk)->thread.gs)
# define lazy_save_gs(v) savesegment(gs, (v))
# define lazy_load_gs(v) loadsegment(gs, (v))
# define load_gs_index(v) loadsegment(gs, (v))
#endif /* X86_32 */
#endif /* !__ASSEMBLY__ */

View File

@ -49,7 +49,6 @@ extern unsigned long saved_video_mode;
extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
extern unsigned long __startup_secondary_64(void);
extern void startup_64_setup_env(unsigned long physbase);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);

View File

@ -5,30 +5,23 @@
* Stack protector works by putting predefined pattern at the start of
* the stack frame and verifying that it hasn't been overwritten when
* returning from the function. The pattern is called stack canary
* and unfortunately gcc requires it to be at a fixed offset from %gs.
* On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
* and x86_32 use segment registers differently and thus handles this
* requirement differently.
* and unfortunately gcc historically required it to be at a fixed offset
* from the percpu segment base. On x86_64, the offset is 40 bytes.
*
* On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always
* fixed_percpu_data which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there.
* The same segment is shared by percpu area and stack canary. On
* x86_64, percpu symbols are zero based and %gs (64-bit) points to the
* base of percpu area. The first occupant of the percpu area is always
* fixed_percpu_data which contains stack_canary at the approproate
* offset. On x86_32, the stack canary is just a regular percpu
* variable.
*
* On x86_32, it's slightly more complicated. As in x86_64, %gs is
* used for userland TLS. Unfortunately, some processors are much
* slower at loading segment registers with different value when
* entering and leaving the kernel, so the kernel uses %fs for percpu
* area and manages %gs lazily so that %gs is switched only when
* necessary, usually during task switch.
* Putting percpu data in %fs on 32-bit is a minor optimization compared to
* using %gs. Since 32-bit userspace normally has %fs == 0, we are likely
* to load 0 into %fs on exit to usermode, whereas with percpu data in
* %gs, we are likely to load a non-null %gs on return to user mode.
*
* As gcc requires the stack canary at %gs:20, %gs can't be managed
* lazily if stack protector is enabled, so the kernel saves and
* restores userland %gs on kernel entry and exit. This behavior is
* controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
* system.h to hide the details.
* Once we are willing to require GCC 8.1 or better for 64-bit stackprotector
* support, we can remove some of this complexity.
*/
#ifndef _ASM_STACKPROTECTOR_H
@ -44,14 +37,6 @@
#include <linux/random.h>
#include <linux/sched.h>
/*
* 24 byte read-only segment initializer for stack canary. Linker
* can't handle the address bit shifting. Address will be set in
* head_32 for boot CPU and setup_per_cpu_areas() for others.
*/
#define GDT_STACK_CANARY_INIT \
[GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
/*
* Initialize the stackprotector canary value.
*
@ -86,7 +71,7 @@ static __always_inline void boot_init_stack_canary(void)
#ifdef CONFIG_X86_64
this_cpu_write(fixed_percpu_data.stack_canary, canary);
#else
this_cpu_write(stack_canary.canary, canary);
this_cpu_write(__stack_chk_guard, canary);
#endif
}
@ -95,48 +80,16 @@ static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_64
per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
#else
per_cpu(stack_canary.canary, cpu) = idle->stack_canary;
#endif
}
static inline void setup_stack_canary_segment(int cpu)
{
#ifdef CONFIG_X86_32
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
struct desc_struct desc;
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
set_desc_base(&desc, canary);
write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
#endif
}
static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
#endif
}
#else /* STACKPROTECTOR */
#define GDT_STACK_CANARY_INIT
/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
static inline void setup_stack_canary_segment(int cpu)
{ }
static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{ }
static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
asm volatile ("mov %0, %%gs" : : "r" (0));
#endif
}
#endif /* STACKPROTECTOR */
#endif /* _ASM_STACKPROTECTOR_H */

View File

@ -12,13 +12,6 @@
/* image of the saved processor state */
struct saved_context {
/*
* On x86_32, all segment registers, with the possible exception of
* gs, are saved at kernel entry in pt_regs.
*/
#ifdef CONFIG_X86_32_LAZY_GS
u16 gs;
#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
struct saved_msrs saved_msrs;
@ -29,6 +22,11 @@ struct saved_context {
unsigned long tr;
unsigned long safety;
unsigned long return_address;
/*
* On x86_32, all segment registers except gs are saved at kernel
* entry in pt_regs.
*/
u16 gs;
bool misc_enable_saved;
} __attribute__((packed));

View File

@ -49,7 +49,6 @@ endif
# non-deterministic coverage.
KCOV_INSTRUMENT := n
CFLAGS_head$(BITS).o += -fno-stack-protector
CFLAGS_cc_platform.o += -fno-stack-protector
CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace

View File

@ -53,11 +53,6 @@ void foo(void)
offsetof(struct cpu_entry_area, tss.x86_tss.sp1) -
offsetofend(struct cpu_entry_area, entry_stack_page.stack));
#ifdef CONFIG_STACKPROTECTOR
BLANK();
OFFSET(stack_canary_offset, stack_canary, canary);
#endif
BLANK();
DEFINE(EFI_svam, offsetof(efi_runtime_services_t, set_virtual_address_map));
}

View File

@ -13,8 +13,6 @@ int main(void)
{
#ifdef CONFIG_PARAVIRT
#ifdef CONFIG_PARAVIRT_XXL
OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template,
cpu.usergs_sysret64);
#ifdef CONFIG_DEBUG_ENTRY
OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl);
#endif

View File

@ -1049,11 +1049,11 @@ static bool cpu_has_zenbleed_microcode(void)
u32 good_rev = 0;
switch (boot_cpu_data.x86_model) {
case 0x30 ... 0x3f: good_rev = 0x0830107a; break;
case 0x60 ... 0x67: good_rev = 0x0860010b; break;
case 0x68 ... 0x6f: good_rev = 0x08608105; break;
case 0x70 ... 0x7f: good_rev = 0x08701032; break;
case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
case 0x30 ... 0x3f: good_rev = 0x0830107b; break;
case 0x60 ... 0x67: good_rev = 0x0860010c; break;
case 0x68 ... 0x6f: good_rev = 0x08608107; break;
case 0x70 ... 0x7f: good_rev = 0x08701033; break;
case 0xa0 ... 0xaf: good_rev = 0x08a00009; break;
default:
return false;

View File

@ -109,9 +109,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
/* Control MDS CPU buffer clear before returning to user space */
DEFINE_STATIC_KEY_FALSE(mds_user_clear);
EXPORT_SYMBOL_GPL(mds_user_clear);
/* Control MDS CPU buffer clear before idling (halt, mwait) */
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear);
@ -249,7 +246,7 @@ static void __init mds_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
mds_mitigation = MDS_MITIGATION_VMWERV;
static_branch_enable(&mds_user_clear);
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
(mds_nosmt || cpu_mitigations_auto_nosmt()))
@ -353,7 +350,7 @@ static void __init taa_select_mitigation(void)
* For guests that can't determine whether the correct microcode is
* present on host, enable the mitigation for UCODE_NEEDED as well.
*/
static_branch_enable(&mds_user_clear);
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
@ -421,7 +418,14 @@ static void __init mmio_select_mitigation(void)
*/
if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
boot_cpu_has(X86_FEATURE_RTM)))
static_branch_enable(&mds_user_clear);
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
/*
* X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based
* mitigations, disable KVM-only mitigation in that case.
*/
if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
static_branch_disable(&mmio_stale_data_clear);
else
static_branch_enable(&mmio_stale_data_clear);
@ -473,6 +477,57 @@ static int __init mmio_stale_data_parse_cmdline(char *str)
}
early_param("mmio_stale_data", mmio_stale_data_parse_cmdline);
#undef pr_fmt
#define pr_fmt(fmt) "Register File Data Sampling: " fmt
enum rfds_mitigations {
RFDS_MITIGATION_OFF,
RFDS_MITIGATION_VERW,
RFDS_MITIGATION_UCODE_NEEDED,
};
/* Default mitigation for Register File Data Sampling */
static enum rfds_mitigations rfds_mitigation __ro_after_init =
IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF;
static const char * const rfds_strings[] = {
[RFDS_MITIGATION_OFF] = "Vulnerable",
[RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File",
[RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
};
static void __init rfds_select_mitigation(void)
{
if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) {
rfds_mitigation = RFDS_MITIGATION_OFF;
return;
}
if (rfds_mitigation == RFDS_MITIGATION_OFF)
return;
if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
}
static __init int rfds_parse_cmdline(char *str)
{
if (!str)
return -EINVAL;
if (!boot_cpu_has_bug(X86_BUG_RFDS))
return 0;
if (!strcmp(str, "off"))
rfds_mitigation = RFDS_MITIGATION_OFF;
else if (!strcmp(str, "on"))
rfds_mitigation = RFDS_MITIGATION_VERW;
return 0;
}
early_param("reg_file_data_sampling", rfds_parse_cmdline);
#undef pr_fmt
#define pr_fmt(fmt) "" fmt
@ -481,12 +536,12 @@ static void __init md_clear_update_mitigation(void)
if (cpu_mitigations_off())
return;
if (!static_key_enabled(&mds_user_clear))
if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
goto out;
/*
* mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
* mitigation, if necessary.
* X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
* Stale Data mitigation, if necessary.
*/
if (mds_mitigation == MDS_MITIGATION_OFF &&
boot_cpu_has_bug(X86_BUG_MDS)) {
@ -498,11 +553,19 @@ static void __init md_clear_update_mitigation(void)
taa_mitigation = TAA_MITIGATION_VERW;
taa_select_mitigation();
}
if (mmio_mitigation == MMIO_MITIGATION_OFF &&
boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
/*
* MMIO_MITIGATION_OFF is not checked here so that mmio_stale_data_clear
* gets updated correctly as per X86_FEATURE_CLEAR_CPU_BUF state.
*/
if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
mmio_mitigation = MMIO_MITIGATION_VERW;
mmio_select_mitigation();
}
if (rfds_mitigation == RFDS_MITIGATION_OFF &&
boot_cpu_has_bug(X86_BUG_RFDS)) {
rfds_mitigation = RFDS_MITIGATION_VERW;
rfds_select_mitigation();
}
out:
if (boot_cpu_has_bug(X86_BUG_MDS))
pr_info("MDS: %s\n", mds_strings[mds_mitigation]);
@ -512,6 +575,8 @@ static void __init md_clear_update_mitigation(void)
pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
pr_info("MMIO Stale Data: Unknown: No mitigations\n");
if (boot_cpu_has_bug(X86_BUG_RFDS))
pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]);
}
static void __init md_clear_select_mitigation(void)
@ -519,11 +584,12 @@ static void __init md_clear_select_mitigation(void)
mds_select_mitigation();
taa_select_mitigation();
mmio_select_mitigation();
rfds_select_mitigation();
/*
* As MDS, TAA and MMIO Stale Data mitigations are inter-related, update
* and print their mitigation after MDS, TAA and MMIO Stale Data
* mitigation selection is done.
* As these mitigations are inter-related and rely on VERW instruction
* to clear the microarchitural buffers, update and print their status
* after mitigation selection is done for each of these vulnerabilities.
*/
md_clear_update_mitigation();
}
@ -1251,19 +1317,21 @@ spectre_v2_user_select_mitigation(void)
}
/*
* If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP
* If no STIBP, Intel enhanced IBRS is enabled, or SMT impossible, STIBP
* is not required.
*
* Enhanced IBRS also protects against cross-thread branch target
* Intel's Enhanced IBRS also protects against cross-thread branch target
* injection in user-mode as the IBRS bit remains always set which
* implicitly enables cross-thread protections. However, in legacy IBRS
* mode, the IBRS bit is set only on kernel entry and cleared on return
* to userspace. This disables the implicit cross-thread protection,
* so allow for STIBP to be selected in that case.
* to userspace. AMD Automatic IBRS also does not protect userspace.
* These modes therefore disable the implicit cross-thread protection,
* so allow for STIBP to be selected in those cases.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
spectre_v2_in_eibrs_mode(spectre_v2_enabled))
(spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
!boot_cpu_has(X86_FEATURE_AUTOIBRS)))
return;
/*
@ -1293,9 +1361,9 @@ static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_NONE] = "Vulnerable",
[SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
[SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
[SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
[SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
[SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
[SPECTRE_V2_EIBRS] = "Mitigation: Enhanced / Automatic IBRS",
[SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced / Automatic IBRS + LFENCE",
[SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced / Automatic IBRS + Retpolines",
[SPECTRE_V2_IBRS] = "Mitigation: IBRS",
};
@ -1364,7 +1432,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
!boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n",
pr_err("%s selected but CPU doesn't have Enhanced or Automatic IBRS. Switching to AUTO select\n",
mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
@ -1549,8 +1617,12 @@ static void __init spectre_v2_select_mitigation(void)
pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
if (spectre_v2_in_ibrs_mode(mode)) {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
update_spec_ctrl(x86_spec_ctrl_base);
if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) {
msr_set_bit(MSR_EFER, _EFER_AUTOIBRS);
} else {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
update_spec_ctrl(x86_spec_ctrl_base);
}
}
switch (mode) {
@ -1634,8 +1706,8 @@ static void __init spectre_v2_select_mitigation(void)
/*
* Retpoline protects the kernel, but doesn't protect firmware. IBRS
* and Enhanced IBRS protect firmware too, so enable IBRS around
* firmware calls only when IBRS / Enhanced IBRS aren't otherwise
* enabled.
* firmware calls only when IBRS / Enhanced / Automatic IBRS aren't
* otherwise enabled.
*
* Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
* the user might select retpoline on the kernel command line and if
@ -2432,74 +2504,74 @@ static const char * const l1tf_vmx_states[] = {
static ssize_t l1tf_show_state(char *buf)
{
if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO)
return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG);
if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
(l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
sched_smt_active())) {
return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation]);
return sysfs_emit(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation]);
}
return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
return sysfs_emit(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
l1tf_vmx_states[l1tf_vmx_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
static ssize_t itlb_multihit_show_state(char *buf)
{
if (!boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
!boot_cpu_has(X86_FEATURE_VMX))
return sprintf(buf, "KVM: Mitigation: VMX unsupported\n");
return sysfs_emit(buf, "KVM: Mitigation: VMX unsupported\n");
else if (!(cr4_read_shadow() & X86_CR4_VMXE))
return sprintf(buf, "KVM: Mitigation: VMX disabled\n");
return sysfs_emit(buf, "KVM: Mitigation: VMX disabled\n");
else if (itlb_multihit_kvm_mitigation)
return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
return sysfs_emit(buf, "KVM: Mitigation: Split huge pages\n");
else
return sprintf(buf, "KVM: Vulnerable\n");
return sysfs_emit(buf, "KVM: Vulnerable\n");
}
#else
static ssize_t l1tf_show_state(char *buf)
{
return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
return sysfs_emit(buf, "%s\n", L1TF_DEFAULT_MSG);
}
static ssize_t itlb_multihit_show_state(char *buf)
{
return sprintf(buf, "Processor vulnerable\n");
return sysfs_emit(buf, "Processor vulnerable\n");
}
#endif
static ssize_t mds_show_state(char *buf)
{
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
return sprintf(buf, "%s; SMT Host state unknown\n",
mds_strings[mds_mitigation]);
return sysfs_emit(buf, "%s; SMT Host state unknown\n",
mds_strings[mds_mitigation]);
}
if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) {
return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
(mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" :
sched_smt_active() ? "mitigated" : "disabled"));
return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
(mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" :
sched_smt_active() ? "mitigated" : "disabled"));
}
return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
return sysfs_emit(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
static ssize_t tsx_async_abort_show_state(char *buf)
{
if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
(taa_mitigation == TAA_MITIGATION_OFF))
return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
return sysfs_emit(buf, "%s\n", taa_strings[taa_mitigation]);
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
return sprintf(buf, "%s; SMT Host state unknown\n",
taa_strings[taa_mitigation]);
return sysfs_emit(buf, "%s; SMT Host state unknown\n",
taa_strings[taa_mitigation]);
}
return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
return sysfs_emit(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
static ssize_t mmio_stale_data_show_state(char *buf)
@ -2519,9 +2591,15 @@ static ssize_t mmio_stale_data_show_state(char *buf)
sched_smt_active() ? "vulnerable" : "disabled");
}
static ssize_t rfds_show_state(char *buf)
{
return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]);
}
static char *stibp_state(void)
{
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
!boot_cpu_has(X86_FEATURE_AUTOIBRS))
return "";
switch (spectre_v2_user_stibp) {
@ -2567,47 +2645,46 @@ static char *pbrsb_eibrs_state(void)
static ssize_t spectre_v2_show_state(char *buf)
{
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
return sprintf(buf, "Vulnerable: LFENCE\n");
return sysfs_emit(buf, "Vulnerable: LFENCE\n");
if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
return sysfs_emit(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
if (sched_smt_active() && unprivileged_ebpf_enabled() &&
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
return sprintf(buf, "%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
stibp_state(),
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
pbrsb_eibrs_state(),
spectre_v2_module_string());
return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
stibp_state(),
boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
pbrsb_eibrs_state(),
spectre_v2_module_string());
}
static ssize_t srbds_show_state(char *buf)
{
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
return sysfs_emit(buf, "%s\n", srbds_strings[srbds_mitigation]);
}
static ssize_t retbleed_show_state(char *buf)
{
if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET ||
retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
return sprintf(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n");
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
return sysfs_emit(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n");
return sprintf(buf, "%s; SMT %s\n",
retbleed_strings[retbleed_mitigation],
!sched_smt_active() ? "disabled" :
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
"enabled with STIBP protection" : "vulnerable");
return sysfs_emit(buf, "%s; SMT %s\n", retbleed_strings[retbleed_mitigation],
!sched_smt_active() ? "disabled" :
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
"enabled with STIBP protection" : "vulnerable");
}
return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
}
static ssize_t gds_show_state(char *buf)
@ -2629,26 +2706,26 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
char *buf, unsigned int bug)
{
if (!boot_cpu_has_bug(bug))
return sprintf(buf, "Not affected\n");
return sysfs_emit(buf, "Not affected\n");
switch (bug) {
case X86_BUG_CPU_MELTDOWN:
if (boot_cpu_has(X86_FEATURE_PTI))
return sprintf(buf, "Mitigation: PTI\n");
return sysfs_emit(buf, "Mitigation: PTI\n");
if (hypervisor_is_type(X86_HYPER_XEN_PV))
return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n");
return sysfs_emit(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n");
break;
case X86_BUG_SPECTRE_V1:
return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
return sysfs_emit(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
case X86_BUG_SPECTRE_V2:
return spectre_v2_show_state(buf);
case X86_BUG_SPEC_STORE_BYPASS:
return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
return sysfs_emit(buf, "%s\n", ssb_strings[ssb_mode]);
case X86_BUG_L1TF:
if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
@ -2680,11 +2757,14 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_SRSO:
return srso_show_state(buf);
case X86_BUG_RFDS:
return rfds_show_state(buf);
default:
break;
}
return sprintf(buf, "Vulnerable\n");
return sysfs_emit(buf, "Vulnerable\n");
}
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
@ -2754,4 +2834,9 @@ ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribut
{
return cpu_show_common(dev, attr, buf, X86_BUG_SRSO);
}
ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf)
{
return cpu_show_common(dev, attr, buf, X86_BUG_RFDS);
}
#endif

View File

@ -166,7 +166,6 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
[GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
GDT_STACK_CANARY_INIT
#endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
@ -600,7 +599,6 @@ void load_percpu_segment(int cpu)
__loadsegment_simple(gs, 0);
wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
#endif
load_stack_canary_segment();
}
#ifdef CONFIG_X86_32
@ -1098,8 +1096,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
/* Zhaoxin Family 7 */
VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
@ -1134,6 +1132,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define SRSO BIT(5)
/* CPU is affected by GDS */
#define GDS BIT(6)
/* CPU is affected by Register File Data Sampling */
#define RFDS BIT(7)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
@ -1161,14 +1161,23 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS),
VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(ALDERLAKE_N, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS),
VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS),
VULNBL_AMD(0x15, RETBLEED),
VULNBL_AMD(0x16, RETBLEED),
VULNBL_AMD(0x17, RETBLEED | SRSO),
VULNBL_HYGON(0x18, RETBLEED),
VULNBL_HYGON(0x18, RETBLEED | SRSO),
VULNBL_AMD(0x19, SRSO),
{}
};
@ -1197,6 +1206,24 @@ static bool arch_cap_mmio_immune(u64 ia32_cap)
ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
}
static bool __init vulnerable_to_rfds(u64 ia32_cap)
{
/* The "immunity" bit trumps everything else: */
if (ia32_cap & ARCH_CAP_RFDS_NO)
return false;
/*
* VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to
* indicate that mitigation is needed because guest is running on a
* vulnerable hardware or may migrate to such hardware:
*/
if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
return true;
/* Only consult the blacklist when there is no enumeration: */
return cpu_matches(cpu_vuln_blacklist, RFDS);
}
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = x86_read_arch_cap_msr();
@ -1219,8 +1246,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
if (ia32_cap & ARCH_CAP_IBRS_ALL)
/*
* AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature
* flag and protect from vendor-specific bugs via the whitelist.
*/
if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
!(ia32_cap & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
}
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
!(ia32_cap & ARCH_CAP_MDS_NO)) {
@ -1282,11 +1317,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_RETBLEED);
}
if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) &&
!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
!(ia32_cap & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
/*
* Check if CPU is vulnerable to GDS. If running in a virtual machine on
* an affected processor, the VMM may have disabled the use of GATHER by
@ -1302,6 +1332,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SRSO);
}
if (vulnerable_to_rfds(ia32_cap))
setup_force_cpu_bug(X86_BUG_RFDS);
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@ -1937,7 +1970,8 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
#endif /* CONFIG_X86_64 */

View File

@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_F16C, X86_FEATURE_XMM2, },
{ X86_FEATURE_AES, X86_FEATURE_XMM2 },
{ X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
{ X86_FEATURE_GFNI, X86_FEATURE_XMM2 },
{ X86_FEATURE_FMA, X86_FEATURE_AVX },
{ X86_FEATURE_VAES, X86_FEATURE_AVX },
{ X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX },
{ X86_FEATURE_AVX2, X86_FEATURE_AVX, },
{ X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
{ X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
{ X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
{ X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
{ X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },

View File

@ -2389,12 +2389,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
return -EINVAL;
b = &per_cpu(mce_banks_array, s->id)[bank];
if (!b->init)
return -ENODEV;
b->ctl = new;
mutex_lock(&mce_sysfs_mutex);
mce_restart();
mutex_unlock(&mce_sysfs_mutex);
return size;
}

View File

@ -100,9 +100,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
.ss = __KERNEL_DS,
.ds = __USER_DS,
.fs = __KERNEL_PERCPU,
#ifndef CONFIG_X86_32_LAZY_GS
.gs = __KERNEL_STACK_CANARY,
#endif
.gs = 0,
.__cr3 = __pa_nodebug(swapper_pg_dir),
},

View File

@ -302,15 +302,6 @@ unsigned long __head __startup_64(unsigned long physaddr,
return sme_get_me_mask();
}
unsigned long __startup_secondary_64(void)
{
/*
* Return the SME encryption mask (if SME is active) to be used as a
* modifier for the initial pgdir entry programmed into CR3.
*/
return sme_get_me_mask();
}
/* Wipe all early page tables except for the kernel symbol map */
static void __init reset_early_page_tables(void)
{

View File

@ -319,8 +319,8 @@ SYM_FUNC_START(startup_32_smp)
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
movl $(__KERNEL_STACK_CANARY),%eax
movl %eax,%gs
xorl %eax,%eax
movl %eax,%gs # clear possible garbage in %gs
xorl %eax,%eax # Clear LDT
lldt %ax
@ -340,20 +340,6 @@ SYM_FUNC_END(startup_32_smp)
*/
__INIT
setup_once:
#ifdef CONFIG_STACKPROTECTOR
/*
* Configure the stack canary. The linker can't handle this by
* relocation. Manually set base address in stack canary
* segment descriptor.
*/
movl $gdt_page,%eax
movl $stack_canary,%ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
#endif
andl $0,setup_once_ref /* Once is enough, thanks */
RET

View File

@ -74,6 +74,22 @@ SYM_CODE_START_NOALIGN(startup_64)
leaq (__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
leaq _text(%rip), %rdi
/*
* initial_gs points to initial fixed_percpu_data struct with storage for
* the stack protector canary. Global pointer fixups are needed at this
* stage, so apply them as is done in fixup_pointer(), and initialize %gs
* such that the canary can be accessed at %gs:40 for subsequent C calls.
*/
movl $MSR_GS_BASE, %ecx
movq initial_gs(%rip), %rax
movq $_text, %rdx
subq %rdx, %rax
addq %rdi, %rax
movq %rax, %rdx
shrq $32, %rdx
wrmsr
pushq %rsi
call startup_64_setup_env
popq %rsi
@ -141,9 +157,11 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
* Retrieve the modifier (SME encryption mask if SME is active) to be
* added to the initial pgdir entry that will be programmed into CR3.
*/
pushq %rsi
call __startup_secondary_64
popq %rsi
#ifdef CONFIG_AMD_MEM_ENCRYPT
movq sme_me_mask, %rax
#else
xorq %rax, %rax
#endif
/* Form the CR3 value being sure to include the CR3 modifier */
addq $(init_top_pgt - __START_KERNEL_map), %rax

View File

@ -519,9 +519,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
write_cr2(this_cpu_read(nmi_cr2));
if (this_cpu_dec_return(nmi_state))
goto nmi_restart;
if (user_mode(regs))
mds_user_clear_cpu_buffers();
}
#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)

View File

@ -124,8 +124,7 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
else if (opfunc == _paravirt_ident_64)
ret = paravirt_patch_ident_64(insn_buff, len);
else if (type == PARAVIRT_PATCH(cpu.iret) ||
type == PARAVIRT_PATCH(cpu.usergs_sysret64))
else if (type == PARAVIRT_PATCH(cpu.iret))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
#endif
@ -159,7 +158,6 @@ static u64 native_steal_clock(int cpu)
/* These are in entry.S */
extern void native_iret(void);
extern void native_usergs_sysret64(void);
static struct resource reserve_ioports = {
.start = 0,
@ -299,7 +297,6 @@ struct paravirt_patch_template pv_ops = {
.cpu.load_sp0 = native_load_sp0,
.cpu.usergs_sysret64 = native_usergs_sysret64,
.cpu.iret = native_iret,
#ifdef CONFIG_X86_IOPL_IOPERM

View File

@ -27,7 +27,6 @@ struct patch_xxl {
const unsigned char mmu_write_cr3[3];
const unsigned char irq_restore_fl[2];
const unsigned char cpu_wbinvd[2];
const unsigned char cpu_usergs_sysret64[6];
const unsigned char mov64[3];
};
@ -40,8 +39,6 @@ static const struct patch_xxl patch_data_xxl = {
.mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
.irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq
.cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
.cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8,
0x48, 0x0f, 0x07 }, // swapgs; sysretq
.mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
};
@ -83,7 +80,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len);
PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
#endif

View File

@ -224,7 +224,6 @@ void __init setup_per_cpu_areas(void)
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu);
setup_stack_canary_segment(cpu);
/*
* Copy data used in early init routines from the
* initial arrays to the per cpu data areas. These

View File

@ -164,17 +164,11 @@ int do_set_thread_area(struct task_struct *p, int idx,
savesegment(fs, sel);
if (sel == modified_sel)
loadsegment(fs, sel);
#endif
savesegment(gs, sel);
if (sel == modified_sel)
load_gs_index(sel);
#endif
#ifdef CONFIG_X86_32_LAZY_GS
savesegment(gs, sel);
if (sel == modified_sel)
loadsegment(gs, sel);
#endif
} else {
#ifdef CONFIG_X86_64
if (p->thread.fsindex == modified_sel)

View File

@ -76,10 +76,12 @@ static const struct cpuid_reg reverse_cpuid[] = {
*/
static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
{
BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_5);
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
}

View File

@ -1024,20 +1024,22 @@ int svm_register_enc_region(struct kvm *kvm,
goto e_free;
}
/*
* The guest may change the memory encryption attribute from C=0 -> C=1
* or vice versa for this memory range. Lets make sure caches are
* flushed to ensure that guest data gets written into memory with
* correct C-bit. Note, this must be done before dropping kvm->lock,
* as region and its array of pages can be freed by a different task
* once kvm->lock is released.
*/
sev_clflush_pages(region->pages, region->npages);
region->uaddr = range->addr;
region->size = range->size;
list_add_tail(&region->list, &sev->regions_list);
mutex_unlock(&kvm->lock);
/*
* The guest may change the memory encryption attribute from C=0 -> C=1
* or vice versa for this memory range. Lets make sure caches are
* flushed to ensure that guest data gets written into memory with
* correct C-bit.
*/
sev_clflush_pages(region->pages, region->npages);
return ret;
e_free:

View File

@ -2,7 +2,10 @@
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H
#define VMX_RUN_VMRESUME (1 << 0)
#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
#define VMX_RUN_VMRESUME_SHIFT 0
#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */

View File

@ -77,7 +77,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */
testb $VMX_RUN_VMRESUME, %bl
bt $VMX_RUN_VMRESUME_SHIFT, %bx
/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@ -99,8 +99,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
/* Check EFLAGS.ZF from 'testb' above */
jz .Lvmlaunch
/* Clobbers EFLAGS.ZF */
CLEAR_CPU_BUFFERS
/* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
jnc .Lvmlaunch
/*
* After a successful VMRESUME/VMLAUNCH, control flow "magically"

View File

@ -397,7 +397,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
vmx_fb_clear_ctrl_available;
/*
* If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
@ -6792,11 +6793,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
guest_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
/* L1D Flush includes CPU buffer clear to mitigate MDS */
/*
* L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
* mitigation for MDS is done late in VMentry and is still
* executed in spite of L1D Flush. This is because an extra VERW
* should not matter much after the big hammer L1D Flush.
*/
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mds_user_clear))
mds_clear_cpu_buffers();
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
mds_clear_cpu_buffers();

View File

@ -1389,7 +1389,8 @@ static unsigned int num_msr_based_features;
ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO)
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
static u64 kvm_get_arch_capabilities(void)
{
@ -1426,6 +1427,8 @@ static u64 kvm_get_arch_capabilities(void)
data |= ARCH_CAP_SSB_NO;
if (!boot_cpu_has_bug(X86_BUG_MDS))
data |= ARCH_CAP_MDS_NO;
if (!boot_cpu_has_bug(X86_BUG_RFDS))
data |= ARCH_CAP_RFDS_NO;
if (!boot_cpu_has(X86_FEATURE_RTM)) {
/*
@ -8498,13 +8501,20 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
static void kvm_inject_exception(struct kvm_vcpu *vcpu)
{
/*
* Suppress the error code if the vCPU is in Real Mode, as Real Mode
* exceptions don't report error codes. The presence of an error code
* is carried with the exception and only stripped when the exception
* is injected as intercepted #PF VM-Exits for AMD's Paged Real Mode do
* report an error code despite the CPU being in Real Mode.
*/
vcpu->arch.exception.has_error_code &= is_protmode(vcpu);
trace_kvm_inj_exception(vcpu->arch.exception.nr,
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code,
vcpu->arch.exception.injected);
if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
vcpu->arch.exception.error_code = false;
kvm_x86_ops.queue_exception(vcpu);
}

View File

@ -404,10 +404,6 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx)
case INAT_SEG_REG_FS:
return (unsigned short)(regs->fs & 0xffff);
case INAT_SEG_REG_GS:
/*
* GS may or may not be in regs as per CONFIG_X86_32_LAZY_GS.
* The macro below takes care of both cases.
*/
return get_user_gs(regs);
case INAT_SEG_REG_IGNORE:
default:

View File

@ -108,6 +108,7 @@ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
ret
int3
SYM_FUNC_END(srso_alias_untrain_ret)
__EXPORT_THUNK(srso_alias_untrain_ret)
#endif
SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
@ -249,9 +250,7 @@ SYM_CODE_START(srso_return_thunk)
SYM_CODE_END(srso_return_thunk)
SYM_FUNC_START(entry_untrain_ret)
ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
"jmp srso_untrain_ret", X86_FEATURE_SRSO, \
"jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
ALTERNATIVE "jmp retbleed_untrain_ret", "jmp srso_untrain_ret", X86_FEATURE_SRSO
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)
@ -259,6 +258,7 @@ SYM_CODE_START(__x86_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
ANNOTATE_UNRET_SAFE
ANNOTATE_NOENDBR
ret
int3
SYM_CODE_END(__x86_return_thunk)

Some files were not shown because too many files have changed in this diff Show More