Merge remote-tracking branch into HEAD

* keystone/mirror-android14-6.1-2024-01: (139 commits)
  ANDROID: Update the pixel symbol list
  BACKPORT: UPSTREAM: phy: qcom-qmp: Introduce Kconfig symbols for discrete drivers
  ANDROID: GKI: add symbols of vendor hooks to ABI for swapping in ahead
  ANDROID: GKI: add vendor hooks for swapping in ahead
  ANDROID: add 16k targets for Microdroid kernel
  FROMGIT: BACKPORT: mm/cma: fix placement of trace_cma_alloc_start/finish
  FROMGIT: wifi: nl80211: Extend del pmksa support for SAE and OWE security
  ANDROID: Update the ABI symbol list
  ANDROID: mm: export dump_tasks symbol.
  FROMLIST: scsi: ufs: Remove the ufshcd_hba_exit() call from ufshcd_async_scan()
  FROMLIST: scsi: ufs: Simplify power management during async scan
  ANDROID: gki_defconfig: Set CONFIG_IDLE_INJECT and CONFIG_CPU_IDLE_THERMAL into y
  ANDROID: KMI workaround for CONFIG_NETFILTER_FAMILY_BRIDGE
  ANDROID: dma-buf: don't re-purpose kobject as work_struct
  BACKPORT: FROMLIST: dma-buf: Move sysfs work out of DMA-BUF export path
  UPSTREAM: netfilter: nf_tables: skip set commit for deleted/destroyed sets
  ANDROID: KVM: arm64: Avoid BUG-ing from the host abort path
  ANDROID: Update the ABI symbol list
  UPSTREAM: ipv4: igmp: fix refcnt uaf issue when receiving igmp query packet
  UPSTREAM: nvmet-tcp: Fix a possible UAF in queue intialization setup
  ...

Change-Id: I98b5a6f1ce746fb3fca8a1ff49d84914dd98e25a
Signed-off-by: Omkar Sai Sandeep Katadi <okatadi@google.com>
This commit is contained in:
Omkar Sai Sandeep Katadi 2024-01-30 19:48:42 +00:00
commit a7a9bfdae3
116 changed files with 5595 additions and 2322 deletions

View File

@ -198,6 +198,34 @@ copy_to_dist_dir(
log = "info",
)
kernel_build(
name = "kernel_aarch64_microdroid_16k",
srcs = ["//common:kernel_aarch64_sources"],
outs = [
"Image",
"System.map",
"modules.builtin",
"modules.builtin.modinfo",
"vmlinux",
"vmlinux.symvers",
],
build_config = "build.config.microdroid.aarch64",
make_goals = [
"Image",
],
page_size = "16k",
)
copy_to_dist_dir(
name = "kernel_aarch64_microdroid_16k_dist",
data = [
":kernel_aarch64_microdroid_16k",
],
dist_dir = "out/kernel_aarch64_microdroid_16k/dist",
flat = True,
log = "info",
)
# Microdroid is not a real device. The kernel image is built with special
# configs to reduce the size. Hence, not using mixed build.
kernel_build(

View File

@ -81,6 +81,9 @@ section.
Sometimes it is necessary to ensure the next call to store to a maple tree does
not allocate memory, please see :ref:`maple-tree-advanced-api` for this use case.
You can use mtree_dup() to duplicate an entire maple tree. It is a more
efficient way than inserting all elements one by one into a new tree.
Finally, you can remove all entries from a maple tree by calling
mtree_destroy(). If the maple tree entries are pointers, you may wish to free
the entries first.
@ -112,6 +115,7 @@ Takes ma_lock internally:
* mtree_insert()
* mtree_insert_range()
* mtree_erase()
* mtree_dup()
* mtree_destroy()
* mt_set_in_rcu()
* mt_clear_in_rcu()

File diff suppressed because it is too large Load Diff

View File

@ -274,6 +274,8 @@
sched_clock
sched_show_task
scnprintf
scsi_device_quiesce
scsi_device_resume
seq_hex_dump
seq_lseek
seq_printf

View File

@ -1025,6 +1025,7 @@
iio_trigger_poll_chained
iio_trigger_register
iio_trigger_unregister
iio_trigger_using_own
import_iovec
in4_pton
inet_csk_get_port

View File

@ -158,6 +158,7 @@
__traceiter_android_vh_dm_bufio_shrink_scan_bypass
__traceiter_android_vh_mutex_unlock_slowpath
__traceiter_android_vh_rtmutex_waiter_prio
__traceiter_android_vh_rt_mutex_steal
__traceiter_android_vh_rwsem_can_spin_on_owner
__traceiter_android_vh_rwsem_opt_spin_finish
__traceiter_android_vh_rwsem_opt_spin_start
@ -258,6 +259,7 @@
__tracepoint_android_vh_record_rtmutex_lock_starttime
__tracepoint_android_vh_record_rwsem_lock_starttime
__tracepoint_android_vh_rtmutex_waiter_prio
__tracepoint_android_vh_rt_mutex_steal
__tracepoint_android_vh_rwsem_can_spin_on_owner
__tracepoint_android_vh_rwsem_opt_spin_finish
__tracepoint_android_vh_rwsem_opt_spin_start

View File

@ -3,8 +3,11 @@
add_cpu
add_timer
add_timer_on
add_uevent_var
add_wait_queue
adjust_managed_page_count
aes_encrypt
aes_expandkey
alarm_cancel
alarm_init
alarm_start_relative
@ -19,6 +22,7 @@
__alloc_percpu
__alloc_percpu_gfp
__alloc_skb
alloc_skb_with_frags
alloc_workqueue
alt_cb_patch_nops
amba_bustype
@ -188,6 +192,7 @@
clockevents_config_and_register
clocks_calc_mult_shift
__clocksource_register_scale
__cma_alloc
cma_alloc
cma_for_each_area
cma_get_name
@ -242,6 +247,7 @@
cpufreq_get_policy
cpufreq_policy_transition_delay_us
cpufreq_quick_get
cpufreq_quick_get_max
cpufreq_register_driver
cpufreq_register_governor
cpufreq_register_notifier
@ -260,6 +266,7 @@
cpu_hwcaps
cpuidle_driver_state_disabled
cpuidle_get_driver
cpuidle_governor_latency_req
cpu_latency_qos_add_request
cpu_latency_qos_remove_request
cpu_latency_qos_update_request
@ -275,6 +282,7 @@
cpus_read_lock
cpus_read_unlock
cpu_subsys
cpu_topology
crc32_be
crc32_le
crc8
@ -297,6 +305,7 @@
crypto_register_shash
crypto_req_done
crypto_shash_digest
crypto_shash_final
crypto_shash_finup
crypto_shash_setkey
crypto_shash_update
@ -310,10 +319,12 @@
csum_partial
csum_tcpudp_nofold
_ctype
datagram_poll
deactivate_task
debugfs_attr_read
debugfs_attr_write
debugfs_create_atomic_t
debugfs_create_blob
debugfs_create_bool
debugfs_create_devm_seqfile
debugfs_create_dir
@ -339,6 +350,7 @@
desc_to_gpio
destroy_workqueue
dev_addr_mod
_dev_alert
dev_alloc_name
__dev_change_net_namespace
dev_close
@ -497,7 +509,11 @@
dev_pm_opp_of_remove_table
dev_pm_opp_put
dev_pm_opp_set_config
dev_pm_qos_add_notifier
dev_pm_qos_add_request
dev_pm_qos_read_value
dev_pm_qos_remove_notifier
dev_pm_qos_remove_request
dev_pm_qos_update_request
_dev_printk
dev_printk_emit
@ -538,20 +554,28 @@
dma_buf_unmap_attachment
dma_buf_vmap
dma_buf_vunmap
dma_direct_alloc
dma_direct_free
dmaengine_unmap_put
dma_fence_add_callback
dma_fence_array_create
dma_fence_array_ops
dma_fence_context_alloc
dma_fence_default_wait
dma_fence_enable_sw_signaling
dma_fence_get_status
dma_fence_init
dma_fence_release
dma_fence_remove_callback
dma_fence_signal
dma_fence_signal_locked
dma_fence_unwrap_first
__dma_fence_unwrap_merge
dma_fence_unwrap_next
dma_fence_wait_timeout
dma_free_attrs
dma_free_pages
dma_get_sgtable_attrs
dma_get_slave_caps
dma_get_slave_channel
dma_heap_add
@ -733,6 +757,7 @@
drm_kms_helper_poll_fini
drm_kms_helper_poll_init
drm_match_cea_mode
__drmm_crtc_alloc_with_planes
drmm_kmalloc
drmm_mode_config_init
drm_mode_config_reset
@ -805,6 +830,7 @@
drm_writeback_signal_completion
dump_backtrace
dump_stack
dump_tasks
dw_handle_msi_irq
dw_pcie_find_capability
dw_pcie_host_init
@ -856,6 +882,7 @@
find_task_by_vpid
find_vma_intersection
finish_wait
firmware_request_nowarn
flush_dcache_page
flush_delayed_work
flush_work
@ -1028,6 +1055,7 @@
ida_destroy
ida_free
idr_alloc
idr_alloc_cyclic
idr_destroy
idr_find
idr_for_each
@ -1047,6 +1075,7 @@
in6_pton
in_aton
inc_zone_page_state
in_egroup_p
inet_csk_get_port
init_dummy_netdev
init_iova_domain
@ -1056,6 +1085,7 @@
__init_swait_queue_head
init_task
init_timer_key
init_user_ns
init_uts_ns
init_wait_entry
__init_waitqueue_head
@ -1123,8 +1153,10 @@
io_schedule_timeout
iounmap
iova_domain_init_rcaches
iov_iter_revert
ip_compute_csum
ip_send_check
__ipv6_addr_type
__irq_alloc_descs
__irq_apply_affinity_hint
irq_create_mapping_affinity
@ -1156,12 +1188,20 @@
jiffies_to_usecs
kasan_flag_enabled
kasprintf
kernel_bind
kernel_connect
kernel_cpustat
kernel_getsockname
kernel_kobj
kernel_param_lock
kernel_param_unlock
kernel_recvmsg
kernel_restart
kernel_sendmsg
kernfs_find_and_get_ns
kernfs_notify
kernfs_path_from_node
kernfs_put
key_create_or_update
key_put
keyring_alloc
@ -1184,6 +1224,7 @@
kmalloc_trace
kmem_cache_alloc
kmem_cache_create
kmem_cache_create_usercopy
kmem_cache_destroy
kmem_cache_free
kmemdup
@ -1198,6 +1239,7 @@
kobject_uevent_env
kobj_sysfs_ops
krealloc
ksize
kstat
kstrdup
kstrndup
@ -1262,6 +1304,7 @@
__list_del_entry_valid
list_sort
__local_bh_enable_ip
lock_sock_nested
log_abnormal_wakeup_reason
log_post_read_mmio
log_post_write_mmio
@ -1287,6 +1330,12 @@
memdup_user_nul
memmove
memparse
mempool_alloc
mempool_alloc_slab
mempool_create
mempool_destroy
mempool_free
mempool_free_slab
memremap
mem_section
memset
@ -1338,6 +1387,7 @@
napi_complete_done
napi_disable
napi_enable
napi_gro_flush
napi_gro_receive
__napi_schedule
napi_schedule_prep
@ -1354,7 +1404,9 @@
netif_receive_skb
netif_receive_skb_list
netif_rx
netif_tx_lock
netif_tx_stop_all_queues
netif_tx_unlock
netif_tx_wake_queue
netlink_broadcast
__netlink_kernel_create
@ -1381,6 +1433,7 @@
nr_cpu_ids
nr_irqs
ns_capable
ns_capable_noaudit
nsec_to_clock_t
ns_to_timespec64
__num_online_cpus
@ -1482,6 +1535,7 @@
panic_notifier_list
param_array_ops
param_get_int
param_get_string
param_ops_bool
param_ops_byte
param_ops_charp
@ -1490,10 +1544,14 @@
param_ops_string
param_ops_uint
param_ops_ulong
param_set_copystring
param_set_int
pci_alloc_irq_vectors_affinity
pci_assign_resource
pci_clear_master
pci_disable_device
pci_disable_msi
pcie_capability_read_word
pci_enable_device
pci_enable_wake
pci_find_bus
@ -1501,6 +1559,9 @@
pci_find_ext_capability
pci_free_irq_vectors
pci_get_device
pci_iomap
pci_iounmap
pci_irq_vector
pci_load_and_free_saved_state
pci_load_saved_state
pci_msi_mask_irq
@ -1508,7 +1569,9 @@
pci_read_config_dword
pci_read_config_word
__pci_register_driver
pci_release_region
pci_release_regions
pci_request_region
pci_rescan_bus
pci_restore_msi_state
pci_restore_state
@ -1606,6 +1669,7 @@
__pm_runtime_use_autosuspend
__pm_stay_awake
pm_stay_awake
pm_system_wakeup
pm_wakeup_dev_event
pm_wakeup_ws_event
power_supply_changed
@ -1640,6 +1704,8 @@
proc_remove
proc_set_size
proc_symlink
proto_register
proto_unregister
pskb_expand_head
__pskb_pull_tail
___pskb_trim
@ -1660,7 +1726,9 @@
radix_tree_delete_item
radix_tree_gang_lookup
radix_tree_insert
radix_tree_iter_delete
radix_tree_lookup
radix_tree_next_chunk
radix_tree_preload
___ratelimit
raw_notifier_call_chain
@ -1668,9 +1736,11 @@
raw_notifier_chain_unregister
_raw_read_lock
_raw_read_lock_bh
_raw_read_lock_irq
_raw_read_lock_irqsave
_raw_read_unlock
_raw_read_unlock_bh
_raw_read_unlock_irq
_raw_read_unlock_irqrestore
_raw_spin_lock
_raw_spin_lock_bh
@ -1684,9 +1754,11 @@
_raw_spin_unlock_irq
_raw_spin_unlock_irqrestore
_raw_write_lock
_raw_write_lock_bh
_raw_write_lock_irq
_raw_write_lock_irqsave
_raw_write_unlock
_raw_write_unlock_bh
_raw_write_unlock_irq
_raw_write_unlock_irqrestore
rb_erase
@ -1701,6 +1773,7 @@
rdev_get_drvdata
rdev_get_id
reboot_mode
refcount_dec_not_one
refcount_warn_saturate
__refrigerator
regcache_cache_only
@ -1718,6 +1791,7 @@
register_netdev
register_netdevice
register_netdevice_notifier
register_netevent_notifier
register_oom_notifier
register_pernet_device
register_pernet_subsys
@ -1760,11 +1834,13 @@
regulator_notifier_call_chain
regulator_put
regulator_set_active_discharge_regmap
regulator_set_load
regulator_set_voltage
regulator_set_voltage_sel_regmap
regulator_unregister
release_firmware
__release_region
release_sock
remap_pfn_range
remap_vmalloc_range
remove_cpu
@ -1865,6 +1941,7 @@
seq_read
seq_release
seq_release_private
seq_vprintf
seq_write
set_capacity
set_capacity_and_notify
@ -1905,20 +1982,25 @@
single_open
single_open_size
single_release
sk_alloc
skb_add_rx_frag
skb_checksum
skb_checksum_help
skb_clone
skb_clone_sk
skb_coalesce_rx_frag
skb_complete_wifi_ack
skb_copy
skb_copy_bits
skb_copy_datagram_iter
skb_copy_expand
skb_dequeue
skb_dequeue_tail
skb_ensure_writable
skb_free_datagram
__skb_get_hash
__skb_gso_segment
__skb_pad
skb_pull
skb_push
skb_put
@ -1926,7 +2008,11 @@
skb_queue_purge
skb_queue_tail
skb_realloc_headroom
skb_recv_datagram
skb_set_owner_w
skb_store_bits
skb_trim
sk_free
skip_spaces
smp_call_function
smp_call_function_single
@ -2003,8 +2089,22 @@
snd_soc_unregister_component
snprintf
soc_device_register
sock_alloc_send_pskb
__sock_create
sock_create_kern
sock_gettstamp
sock_init_data
sock_no_accept
sock_no_listen
sock_no_mmap
sock_no_sendpage
sock_no_shutdown
sock_no_socketpair
sock_queue_rcv_skb_reason
sock_register
sock_release
sock_setsockopt
sock_unregister
sock_wfree
softnet_data
sort
@ -2042,6 +2142,7 @@
strcasecmp
strcat
strchr
strchrnul
strcmp
strcpy
strcspn
@ -2081,7 +2182,9 @@
synchronize_rcu
syscon_regmap_lookup_by_phandle
sysctl_sched_features
sysctl_sched_idle_min_granularity
sysctl_sched_latency
sysctl_sched_min_granularity
sysfs_add_file_to_group
sysfs_add_link_to_group
sysfs_create_file_ns
@ -2149,6 +2252,7 @@
thermal_zone_get_temp
thermal_zone_get_zone_by_name
thread_group_cputime_adjusted
tick_nohz_get_idle_calls_cpu
time64_to_tm
topology_update_thermal_pressure
_totalram_pages
@ -2208,6 +2312,7 @@
__traceiter_android_vh_dup_task_struct
__traceiter_android_vh_early_resume_begin
__traceiter_android_vh_enable_thermal_genl_check
__traceiter_android_vh_ep_create_wakeup_source
__traceiter_android_vh_filemap_get_folio
__traceiter_android_vh_ipi_stop
__traceiter_android_vh_meminfo_proc_show
@ -2221,6 +2326,7 @@
__traceiter_android_vh_setscheduler_uclamp
__traceiter_android_vh_si_meminfo_adjust
__traceiter_android_vh_sysrq_crash
__traceiter_android_vh_timerfd_create
__traceiter_android_vh_typec_store_partner_src_caps
__traceiter_android_vh_typec_tcpci_override_toggling
__traceiter_android_vh_typec_tcpm_get_timer
@ -2315,6 +2421,7 @@
__tracepoint_android_vh_dup_task_struct
__tracepoint_android_vh_early_resume_begin
__tracepoint_android_vh_enable_thermal_genl_check
__tracepoint_android_vh_ep_create_wakeup_source
__tracepoint_android_vh_filemap_get_folio
__tracepoint_android_vh_ipi_stop
__tracepoint_android_vh_meminfo_proc_show
@ -2328,6 +2435,7 @@
__tracepoint_android_vh_setscheduler_uclamp
__tracepoint_android_vh_si_meminfo_adjust
__tracepoint_android_vh_sysrq_crash
__tracepoint_android_vh_timerfd_create
__tracepoint_android_vh_typec_store_partner_src_caps
__tracepoint_android_vh_typec_tcpci_override_toggling
__tracepoint_android_vh_typec_tcpm_get_timer
@ -2437,6 +2545,7 @@
unregister_netdevice_many
unregister_netdevice_notifier
unregister_netdevice_queue
unregister_netevent_notifier
unregister_oom_notifier
unregister_pernet_device
unregister_pernet_subsys
@ -2585,6 +2694,7 @@
vring_del_virtqueue
vring_interrupt
vring_new_virtqueue
vscnprintf
vsnprintf
vunmap
vzalloc
@ -2592,6 +2702,7 @@
wait_for_completion
wait_for_completion_interruptible
wait_for_completion_interruptible_timeout
wait_for_completion_killable
wait_for_completion_timeout
wait_woken
__wake_up
@ -2609,6 +2720,7 @@
watchdog_set_restart_priority
watchdog_unregister_device
wireless_nlevent_flush
wireless_send_event
woken_wake_function
work_busy
__write_overflow_field
@ -2620,11 +2732,13 @@
xa_find
xa_find_after
xa_get_mark
__xa_insert
xa_load
xa_set_mark
xas_find
xas_pause
__xa_store
xa_store
__xfrm_state_destroy
xfrm_state_lookup_byspi
xfrm_stateonly_find

View File

@ -2,6 +2,7 @@
# commonly used symbols
add_timer
alloc_chrdev_region
alloc_etherdev_mqs
alloc_iova_fast
__alloc_pages
__alloc_skb
@ -827,9 +828,25 @@
param_ops_int
param_ops_string
param_ops_uint
param_ops_ulong
pci_disable_device
pci_disable_link_state
pcie_capability_clear_and_set_word
pci_find_capability
pcim_enable_device
pcim_iomap_table
pcim_pin_device
pci_read_config_byte
pci_read_config_dword
pci_read_config_word
__pci_register_driver
pci_restore_state
pci_save_state
pci_set_master
pci_set_power_state
pci_unregister_driver
pci_write_config_dword
pci_write_config_word
__per_cpu_offset
perf_trace_buf_alloc
perf_trace_run_bpf_submit
@ -1023,7 +1040,11 @@
sched_set_fifo
schedule
schedule_timeout
schedule_timeout_uninterruptible
scnprintf
scsi_command_size_tbl
scsi_device_get
scsi_device_put
__sdhci_add_host
sdhci_cleanup_host
sdhci_enable_clk
@ -1247,6 +1268,15 @@
usb_submit_urb
__usecs_to_jiffies
usleep_range_state
__v4l2_async_nf_add_fwnode_remote
v4l2_async_nf_cleanup
v4l2_async_nf_init
v4l2_async_nf_parse_fwnode_endpoints
v4l2_async_nf_register
v4l2_async_register_subdev
v4l2_async_register_subdev_sensor
v4l2_async_subdev_nf_register
v4l2_async_unregister_subdev
v4l2_ctrl_find
v4l2_ctrl_g_ctrl
v4l2_ctrl_g_ctrl_int64
@ -1274,6 +1304,9 @@
v4l2_event_subscribe
v4l2_event_unsubscribe
v4l2_fh_open
v4l2_fwnode_endpoint_alloc_parse
v4l2_fwnode_endpoint_free
v4l2_fwnode_endpoint_parse
v4l2_i2c_subdev_init
v4l2_match_dv_timings
v4l2_pipeline_link_notify
@ -1325,6 +1358,7 @@
vunmap
vzalloc
wait_for_completion
wait_for_completion_interruptible
wait_for_completion_timeout
__wake_up
wake_up_process
@ -1346,15 +1380,23 @@
skcipher_walk_aead_decrypt
skcipher_walk_aead_encrypt
# required by ahci.ko
pci_alloc_irq_vectors_affinity
pci_free_irq_vectors
pci_intx
pci_irq_vector
pci_match_id
pcim_iomap_regions_request_all
sysfs_add_file_to_group
sysfs_remove_file_from_group
# required by analogix_dp.ko
drm_atomic_get_old_connector_for_encoder
# required by aspm_ext.ko
pci_find_capability
pci_find_ext_capability
# required by bcmdhd.ko
alloc_etherdev_mqs
cpu_bit_bitmap
down_interruptible
down_timeout
@ -1873,6 +1915,60 @@
# required by ledtrig-heartbeat.ko
avenrun
# required by libahci.ko
__printk_ratelimit
# required by libahci_platform.ko
reset_control_rearm
# required by libata.ko
async_schedule_node
async_synchronize_cookie
attribute_container_register
attribute_container_unregister
autoremove_wake_function
blk_abort_request
blk_queue_max_hw_sectors
blk_queue_max_segments
blk_queue_update_dma_alignment
blk_queue_update_dma_pad
glob_match
pci_bus_type
pcim_iomap_regions
prepare_to_wait
__scsi_add_device
scsi_add_host_with_dma
scsi_build_sense
scsi_change_queue_depth
scsi_check_sense
scsi_device_set_state
scsi_done
scsi_eh_finish_cmd
scsi_eh_flush_done_q
scsi_execute_cmd
__scsi_format_command
scsi_host_alloc
scsi_host_put
scsi_remove_device
scsi_remove_host
scsi_rescan_device
scsi_schedule_eh
scsi_sense_desc_find
scsi_set_sense_field_pointer
scsi_set_sense_information
sdev_evt_send_simple
system_entering_hibernation
trace_seq_printf
trace_seq_putc
transport_add_device
transport_class_register
transport_class_unregister
transport_configure_device
transport_destroy_device
transport_remove_device
transport_setup_device
vscnprintf
# required by mac80211.ko
alloc_netdev_mqs
__alloc_percpu_gfp
@ -2787,9 +2883,11 @@
# required by video_rkcif.ko
media_entity_setup_link
__v4l2_async_nf_add_fwnode
# required by video_rkisp.ko
param_ops_ullong
v4l2_async_nf_unregister
v4l2_ctrl_poll
# required by videobuf2-cma-sg.ko

View File

@ -341,3 +341,7 @@
#required by zram.ko
bioset_init
bioset_exit
#required by mi_asap.ko
__traceiter_android_vh_read_pages
__tracepoint_android_vh_read_pages

View File

@ -642,7 +642,6 @@ config ARM64_WORKAROUND_REPEAT_TLBI
config ARM64_ERRATUM_2441007
bool "Cortex-A55: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
default y
select ARM64_WORKAROUND_REPEAT_TLBI
help
This option adds a workaround for ARM Cortex-A55 erratum #2441007.
@ -881,7 +880,6 @@ config ARM64_ERRATUM_2224489
config ARM64_ERRATUM_2441009
bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
default y
select ARM64_WORKAROUND_REPEAT_TLBI
help
This option adds a workaround for ARM Cortex-A510 erratum #2441009.

View File

@ -123,6 +123,9 @@ CONFIG_ANON_VMA_NAME=y
CONFIG_USERFAULTFD=y
CONFIG_LRU_GEN=y
CONFIG_LRU_GEN_ENABLED=y
CONFIG_DAMON=y
CONFIG_DAMON_VADDR=y
CONFIG_DAMON_SYSFS=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@ -428,6 +431,7 @@ CONFIG_THERMAL_WRITABLE_TRIPS=y
CONFIG_THERMAL_GOV_USER_SPACE=y
CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
CONFIG_CPU_THERMAL=y
CONFIG_CPU_IDLE_THERMAL=y
CONFIG_DEVFREQ_THERMAL=y
CONFIG_THERMAL_EMULATION=y
CONFIG_WATCHDOG=y
@ -577,6 +581,7 @@ CONFIG_IIO_TRIGGER=y
CONFIG_PWM=y
CONFIG_GENERIC_PHY=y
CONFIG_POWERCAP=y
CONFIG_IDLE_INJECT=y
CONFIG_ANDROID_BINDER_IPC=y
CONFIG_ANDROID_BINDERFS=y
CONFIG_ANDROID_DEBUG_SYMBOLS=y

View File

@ -53,7 +53,7 @@ HYP_EVENT(host_smc,
__entry->id = id;
__entry->forwarded = forwarded;
),
HE_PRINTK("id=%llu invalid=%u",
HE_PRINTK("id=%llu forwarded=%u",
__entry->id, __entry->forwarded)
);

View File

@ -16,7 +16,7 @@ struct hyp_entry_hdr {
* Hyp events definitions common to the hyp and the host
*/
#define HYP_EVENT_FORMAT(__name, __struct) \
struct trace_hyp_format_##__name { \
struct __packed trace_hyp_format_##__name { \
struct hyp_entry_hdr hdr; \
__struct \
}

View File

@ -72,6 +72,11 @@ enum pkvm_psci_notification {
* @register_host_perm_fault_handler), otherwise
* pKVM will be unable to handle this fault and the
* CPU will be stuck in an infinite loop.
* @host_stage2_mod_prot_range: Similar to @host_stage2_mod_prot, but takes a
* range as an argument (@nr_pages). This
* considerably speeds up the process for a
* contiguous memory region, compared to the
* per-page @host_stage2_mod_prot.
* @host_stage2_get_leaf: Query the host's stage2 page-table entry for
* the page @phys.
* @register_host_smc_handler: @cb is called whenever the host issues an SMC
@ -153,7 +158,8 @@ struct pkvm_module_ops {
void* (*hyp_va)(phys_addr_t phys);
unsigned long (*kern_hyp_va)(unsigned long x);
ANDROID_KABI_RESERVE(1);
ANDROID_KABI_USE(1, int (*host_stage2_mod_prot_range)(u64 pfn, enum kvm_pgtable_prot prot, u64 nr_pages));
ANDROID_KABI_RESERVE(2);
ANDROID_KABI_RESERVE(3);
ANDROID_KABI_RESERVE(4);

View File

@ -104,6 +104,7 @@ int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
struct kvm_hyp_memcache *host_mc);
int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot);
int module_change_host_page_prot_range(u64 pfn, enum kvm_pgtable_prot prot, u64 nr_pages);
void destroy_hyp_vm_pgt(struct pkvm_hyp_vm *vm);
void drain_hyp_pool(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc);

View File

@ -149,22 +149,16 @@ static void prepare_host_vtcr(void)
static int prepopulate_host_stage2(void)
{
struct memblock_region *reg;
u64 addr = 0;
int i, ret;
int i, ret = 0;
for (i = 0; i < hyp_memblock_nr; i++) {
reg = &hyp_memory[i];
ret = host_stage2_idmap_locked(addr, reg->base - addr, PKVM_HOST_MMIO_PROT, false);
if (ret)
return ret;
ret = host_stage2_idmap_locked(reg->base, reg->size, PKVM_HOST_MEM_PROT, false);
if (ret)
return ret;
addr = reg->base + reg->size;
}
return host_stage2_idmap_locked(addr, BIT(host_mmu.pgt.ia_bits) - addr, PKVM_HOST_MMIO_PROT,
false);
return ret;
}
int kvm_host_prepare_stage2(void *pgt_pool_base)
@ -881,7 +875,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
int ret = -EPERM;
esr = read_sysreg_el2(SYS_ESR);
BUG_ON(!__get_fault_info(esr, &fault));
if (!__get_fault_info(esr, &fault)) {
addr = (u64)-1;
/*
* We've presumably raced with a page-table change which caused
* AT to fail, try again.
*/
goto return_to_host;
}
fault.esr_el2 = esr;
addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
@ -908,6 +909,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
else
BUG_ON(ret && ret != -EAGAIN);
return_to_host:
trace_host_mem_abort(esr, addr);
}
@ -2008,77 +2010,80 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
return ret;
}
static int restrict_host_page_perms(u64 addr, kvm_pte_t pte, u32 level, enum kvm_pgtable_prot prot)
{
int ret = 0;
/* XXX: optimize ... */
if (kvm_pte_valid(pte) && (level == KVM_PGTABLE_MAX_LEVELS - 1))
ret = kvm_pgtable_stage2_unmap(&host_mmu.pgt, addr, PAGE_SIZE);
if (!ret)
ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot, false);
return ret;
}
#define MODULE_PROT_ALLOWLIST (KVM_PGTABLE_PROT_RWX | \
KVM_PGTABLE_PROT_DEVICE |\
KVM_PGTABLE_PROT_NC | \
KVM_PGTABLE_PROT_PXN | \
KVM_PGTABLE_PROT_UXN)
int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot)
int module_change_host_page_prot_range(u64 pfn, enum kvm_pgtable_prot prot, u64 nr_pages)
{
u64 addr = hyp_pfn_to_phys(pfn);
u64 i, addr = hyp_pfn_to_phys(pfn);
u64 end = addr + nr_pages * PAGE_SIZE;
struct hyp_page *page = NULL;
kvm_pte_t pte;
u32 level;
struct kvm_mem_range range;
bool is_mmio;
int ret;
if ((prot & MODULE_PROT_ALLOWLIST) != prot)
return -EINVAL;
is_mmio = !find_mem_range(addr, &range);
if (end > range.end) {
/* Specified range not in a single mmio or memory block. */
return -EPERM;
}
host_lock_component();
ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
if (ret)
goto unlock;
/*
* There is no hyp_vmemmap covering MMIO regions, which makes tracking
* of module-owned MMIO regions hard, so we trust the modules not to
* mess things up.
*/
if (!addr_is_memory(addr))
if (is_mmio)
goto update;
ret = -EPERM;
/* Range is memory: we can track module ownership. */
page = hyp_phys_to_page(addr);
/*
* Modules can only relax permissions of pages they own, and restrict
* permissions of pristine pages.
* Modules can only modify pages they already own, and pristine host
* pages. The entire range must be consistently one or the other.
*/
if (prot == KVM_PGTABLE_PROT_RWX) {
if (!(page->flags & MODULE_OWNED_PAGE))
if (page->flags & MODULE_OWNED_PAGE) {
/* The entire range must be module-owned. */
ret = -EPERM;
for (i = 1; i < nr_pages; i++) {
if (!(page[i].flags & MODULE_OWNED_PAGE))
goto unlock;
} else if (host_get_page_state(pte, addr) != PKVM_PAGE_OWNED) {
}
} else {
/* The entire range must be pristine. */
ret = __host_check_page_state_range(
addr, nr_pages << PAGE_SHIFT, PKVM_PAGE_OWNED);
if (ret)
goto unlock;
}
update:
if (prot == default_host_prot(!!page))
ret = host_stage2_set_owner_locked(addr, PAGE_SIZE, PKVM_ID_HOST);
else if (!prot)
ret = host_stage2_set_owner_locked(addr, PAGE_SIZE, PKVM_ID_PROTECTED);
else
ret = restrict_host_page_perms(addr, pte, level, prot);
if (!prot) {
ret = host_stage2_set_owner_locked(
addr, nr_pages << PAGE_SHIFT, PKVM_ID_PROTECTED);
} else {
ret = host_stage2_idmap_locked(
addr, nr_pages << PAGE_SHIFT, prot, false);
}
if (ret || !page)
if (WARN_ON(ret) || !page)
goto unlock;
for (i = 0; i < nr_pages; i++) {
if (prot != KVM_PGTABLE_PROT_RWX)
hyp_phys_to_page(addr)->flags |= MODULE_OWNED_PAGE;
page[i].flags |= MODULE_OWNED_PAGE;
else
hyp_phys_to_page(addr)->flags &= ~MODULE_OWNED_PAGE;
page[i].flags &= ~MODULE_OWNED_PAGE;
}
unlock:
host_unlock_component();
@ -2086,6 +2091,11 @@ int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot)
return ret;
}
int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot)
{
return module_change_host_page_prot_range(pfn, prot, 1);
}
int hyp_pin_shared_mem(void *from, void *to)
{
u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);

View File

@ -115,6 +115,7 @@ const struct pkvm_module_ops module_ops = {
.hyp_pa = hyp_virt_to_phys,
.hyp_va = hyp_phys_to_virt,
.kern_hyp_va = __kern_hyp_va,
.host_stage2_mod_prot_range = module_change_host_page_prot_range,
};
int __pkvm_init_module(void *module_init)

View File

@ -645,8 +645,13 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
return prot;
}
static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
static bool stage2_pte_needs_update(struct kvm_pgtable *pgt,
kvm_pte_t old, kvm_pte_t new)
{
/* Following filter logic applies only to guest stage-2 entries. */
if (pgt->flags & KVM_PGTABLE_S2_IDMAP)
return true;
if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
return true;
@ -715,12 +720,15 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
new = data->annotation;
/*
* Skip updating the PTE if we are trying to recreate the exact
* same mapping or only change the access permissions. Instead,
* the vCPU will exit one more time from guest if still needed
* and then go through the path of relaxing permissions.
* Skip updating a guest PTE if we are trying to recreate the exact
* same mapping or change only the access permissions. Instead,
* the vCPU will exit one more time from the guest if still needed
* and then go through the path of relaxing permissions. This applies
* only to guest PTEs; Host PTEs are unconditionally updated. The
* host cannot livelock because the abort handler has done prior
* checks before calling here.
*/
if (!stage2_pte_needs_update(old, new))
if (!stage2_pte_needs_update(pgt, old, new))
return -EAGAIN;
if (pte_ops->pte_is_counted_cb(old, level))
@ -775,6 +783,30 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
return 0;
}
static void stage2_map_prefault_idmap(struct kvm_pgtable_pte_ops *pte_ops,
u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, kvm_pte_t block_pte)
{
u64 pa, granule;
int i;
WARN_ON(pte_ops->pte_is_counted_cb(block_pte, level-1));
if (!kvm_pte_valid(block_pte))
return;
pa = ALIGN_DOWN(addr, kvm_granule_size(level-1));
granule = kvm_granule_size(level);
for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep, pa += granule) {
kvm_pte_t pte = kvm_init_valid_leaf_pte(pa, block_pte, level);
/* Skip ptes in the range being modified by the caller. */
if ((pa < addr) || (pa >= end)) {
/* We can write non-atomically: ptep isn't yet live. */
*ptep = pte;
}
}
}
static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct stage2_map_data *data)
{
@ -805,6 +837,11 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
if (!childp)
return -ENOMEM;
if (pgt->flags & KVM_PGTABLE_S2_IDMAP) {
stage2_map_prefault_idmap(pte_ops, addr, end, level + 1,
childp, pte);
}
/*
* If we've run into an existing block mapping then replace it with
* a table. Accesses beyond 'end' that fall within the new table

View File

@ -619,6 +619,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
if (fault & VM_FAULT_MAJOR)
mm_flags |= FAULT_FLAG_TRIED;
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {

View File

@ -496,6 +496,8 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
if (fault & VM_FAULT_MAJOR)
flags |= FAULT_FLAG_TRIED;
if (fault_signal_pending(fault, regs))
return user_mode(regs) ? 0 : SIGBUS;

View File

@ -310,6 +310,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
if (fault & VM_FAULT_MAJOR)
flags |= FAULT_FLAG_TRIED;
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))

View File

@ -420,6 +420,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
goto out;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
if (fault & VM_FAULT_MAJOR)
flags |= FAULT_FLAG_TRIED;
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
fault = VM_FAULT_SIGNAL;

View File

@ -103,6 +103,16 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
return ES_OK;
}
static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
{
return ES_OK;
}
static bool fault_in_kernel_space(unsigned long address)
{
return false;
}
#undef __init
#undef __pa
#define __init

View File

@ -118,6 +118,9 @@ CONFIG_ANON_VMA_NAME=y
CONFIG_USERFAULTFD=y
CONFIG_LRU_GEN=y
CONFIG_LRU_GEN_ENABLED=y
CONFIG_DAMON=y
CONFIG_DAMON_VADDR=y
CONFIG_DAMON_SYSFS=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@ -393,6 +396,7 @@ CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=100
CONFIG_THERMAL_WRITABLE_TRIPS=y
CONFIG_THERMAL_GOV_USER_SPACE=y
CONFIG_CPU_THERMAL=y
CONFIG_CPU_IDLE_THERMAL=y
CONFIG_DEVFREQ_THERMAL=y
CONFIG_THERMAL_EMULATION=y
# CONFIG_X86_PKG_TEMP_THERMAL is not set
@ -520,6 +524,7 @@ CONFIG_IIO=y
CONFIG_IIO_BUFFER=y
CONFIG_IIO_TRIGGER=y
CONFIG_POWERCAP=y
CONFIG_IDLE_INJECT=y
CONFIG_ANDROID_BINDER_IPC=y
CONFIG_ANDROID_BINDERFS=y
CONFIG_ANDROID_DEBUG_SYMBOLS=y

View File

@ -629,6 +629,23 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
}
static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt,
unsigned long address,
bool write)
{
if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) {
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = X86_PF_USER;
ctxt->fi.cr2 = address;
if (write)
ctxt->fi.error_code |= X86_PF_WRITE;
return ES_EXCEPTION;
}
return ES_OK;
}
static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
void *src, char *buf,
unsigned int data_size,
@ -636,7 +653,12 @@ static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
bool backwards)
{
int i, b = backwards ? -1 : 1;
enum es_result ret = ES_OK;
unsigned long address = (unsigned long)src;
enum es_result ret;
ret = vc_insn_string_check(ctxt, address, false);
if (ret != ES_OK)
return ret;
for (i = 0; i < count; i++) {
void *s = src + (i * data_size * b);
@ -657,7 +679,12 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
bool backwards)
{
int i, s = backwards ? -1 : 1;
enum es_result ret = ES_OK;
unsigned long address = (unsigned long)dst;
enum es_result ret;
ret = vc_insn_string_check(ctxt, address, true);
if (ret != ES_OK)
return ret;
for (i = 0; i < count; i++) {
void *d = dst + (i * data_size * s);
@ -693,6 +720,9 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
{
struct insn *insn = &ctxt->insn;
size_t size;
u64 port;
*exitinfo = 0;
switch (insn->opcode.bytes[0]) {
@ -701,7 +731,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0x6d:
*exitinfo |= IOIO_TYPE_INS;
*exitinfo |= IOIO_SEG_ES;
*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
port = ctxt->regs->dx & 0xffff;
break;
/* OUTS opcodes */
@ -709,41 +739,43 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0x6f:
*exitinfo |= IOIO_TYPE_OUTS;
*exitinfo |= IOIO_SEG_DS;
*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
port = ctxt->regs->dx & 0xffff;
break;
/* IN immediate opcodes */
case 0xe4:
case 0xe5:
*exitinfo |= IOIO_TYPE_IN;
*exitinfo |= (u8)insn->immediate.value << 16;
port = (u8)insn->immediate.value & 0xffff;
break;
/* OUT immediate opcodes */
case 0xe6:
case 0xe7:
*exitinfo |= IOIO_TYPE_OUT;
*exitinfo |= (u8)insn->immediate.value << 16;
port = (u8)insn->immediate.value & 0xffff;
break;
/* IN register opcodes */
case 0xec:
case 0xed:
*exitinfo |= IOIO_TYPE_IN;
*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
port = ctxt->regs->dx & 0xffff;
break;
/* OUT register opcodes */
case 0xee:
case 0xef:
*exitinfo |= IOIO_TYPE_OUT;
*exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
port = ctxt->regs->dx & 0xffff;
break;
default:
return ES_DECODE_FAILED;
}
*exitinfo |= port << 16;
switch (insn->opcode.bytes[0]) {
case 0x6c:
case 0x6e:
@ -753,12 +785,15 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0xee:
/* Single byte opcodes */
*exitinfo |= IOIO_DATA_8;
size = 1;
break;
default:
/* Length determined by instruction parsing */
*exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
: IOIO_DATA_32;
size = (insn->opnd_bytes == 2) ? 2 : 4;
}
switch (insn->addr_bytes) {
case 2:
*exitinfo |= IOIO_ADDR_16;
@ -774,7 +809,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
if (insn_has_rep_prefix(insn))
*exitinfo |= IOIO_REP;
return ES_OK;
return vc_ioio_check(ctxt, (u16)port, size);
}
static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)

View File

@ -512,6 +512,33 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
return ES_OK;
}
static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
{
BUG_ON(size > 4);
if (user_mode(ctxt->regs)) {
struct thread_struct *t = &current->thread;
struct io_bitmap *iobm = t->io_bitmap;
size_t idx;
if (!iobm)
goto fault;
for (idx = port; idx < port + size; ++idx) {
if (test_bit(idx, iobm->bitmap))
goto fault;
}
}
return ES_OK;
fault:
ctxt->fi.vector = X86_TRAP_GP;
ctxt->fi.error_code = 0;
return ES_EXCEPTION;
}
/* Include code shared with pre-decompression boot stage */
#include "sev-shared.c"

View File

@ -1369,6 +1369,8 @@ void do_user_addr_fault(struct pt_regs *regs,
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
if (fault & VM_FAULT_MAJOR)
flags |= FAULT_FLAG_TRIED;
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {

View File

@ -2553,3 +2553,49 @@ void bpf_jit_free(struct bpf_prog *prog)
bpf_prog_unlock_free(prog);
}
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old)
{
u8 *old_addr, *new_addr, *old_bypass_addr;
int ret;
old_bypass_addr = old ? NULL : poke->bypass_addr;
old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
/*
* On program loading or teardown, the program's kallsym entry
* might not be in place, so we use __bpf_arch_text_poke to skip
* the kallsyms check.
*/
if (new) {
ret = __bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP,
old_addr, new_addr);
BUG_ON(ret < 0);
if (!old) {
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
poke->bypass_addr,
NULL);
BUG_ON(ret < 0);
}
} else {
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
old_bypass_addr,
poke->bypass_addr);
BUG_ON(ret < 0);
/* let other CPUs finish the execution of program
* so that it will not possible to expose them
* to invalid nop, stack unwind, nop state
*/
if (!ret)
synchronize_rcu();
ret = __bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP,
old_addr, NULL);
BUG_ON(ret < 0);
}
}

View File

@ -2127,24 +2127,23 @@ static void binder_deferred_fd_close(int fd)
static void binder_transaction_buffer_release(struct binder_proc *proc,
struct binder_thread *thread,
struct binder_buffer *buffer,
binder_size_t failed_at,
binder_size_t off_end_offset,
bool is_failure)
{
int debug_id = buffer->debug_id;
binder_size_t off_start_offset, buffer_offset, off_end_offset;
binder_size_t off_start_offset, buffer_offset;
binder_debug(BINDER_DEBUG_TRANSACTION,
"%d buffer release %d, size %zd-%zd, failed at %llx\n",
proc->pid, buffer->debug_id,
buffer->data_size, buffer->offsets_size,
(unsigned long long)failed_at);
(unsigned long long)off_end_offset);
if (buffer->target_node)
binder_dec_node(buffer->target_node, 1, 0);
off_start_offset = ALIGN(buffer->data_size, sizeof(void *));
off_end_offset = is_failure && failed_at ? failed_at :
off_start_offset + buffer->offsets_size;
for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
buffer_offset += sizeof(binder_size_t)) {
struct binder_object_header *hdr;
@ -2304,6 +2303,21 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
}
}
/* Clean up all the objects in the buffer */
static inline void binder_release_entire_buffer(struct binder_proc *proc,
struct binder_thread *thread,
struct binder_buffer *buffer,
bool is_failure)
{
binder_size_t off_end_offset;
off_end_offset = ALIGN(buffer->data_size, sizeof(void *));
off_end_offset += buffer->offsets_size;
binder_transaction_buffer_release(proc, thread, buffer,
off_end_offset, is_failure);
}
static int binder_translate_binder(struct flat_binder_object *fp,
struct binder_transaction *t,
struct binder_thread *thread)
@ -3013,7 +3027,7 @@ static int binder_proc_transaction(struct binder_transaction *t,
t_outdated->buffer = NULL;
buffer->transaction = NULL;
trace_binder_transaction_update_buffer_release(buffer);
binder_transaction_buffer_release(proc, NULL, buffer, 0, 0);
binder_release_entire_buffer(proc, NULL, buffer, false);
binder_alloc_free_buf(&proc->alloc, buffer);
kfree(t_outdated);
binder_stats_deleted(BINDER_STAT_TRANSACTION);
@ -4004,7 +4018,7 @@ binder_free_buf(struct binder_proc *proc,
binder_node_inner_unlock(buf_node);
}
trace_binder_transaction_buffer_release(buffer);
binder_transaction_buffer_release(proc, thread, buffer, 0, is_failure);
binder_release_entire_buffer(proc, thread, buffer, is_failure);
binder_alloc_free_buf(&proc->alloc, buffer);
}

View File

@ -26,6 +26,7 @@
#include <trace/hooks/printk.h>
#include <trace/hooks/epoch.h>
#include <trace/hooks/cpufreq.h>
#include <trace/hooks/fs.h>
#include <trace/hooks/preemptirq.h>
#include <trace/hooks/ftrace_dump.h>
#include <trace/hooks/ufshcd.h>
@ -94,6 +95,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_task_blocks_on_rtmutex);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rtmutex_waiter_prio);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rtmutex_wait_start);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rtmutex_wait_finish);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_rt_mutex_steal);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mutex_opt_spin_start);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mutex_opt_spin_finish);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mutex_can_spin_on_owner);
@ -310,6 +312,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_thermal_unregister);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_get_thermal_zone_device);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_thermal_power_cap);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_enable_thermal_power_throttle);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_read_pages);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_pages_reclaim_bypass);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_pages_failure_bypass);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_madvise_pageout_swap_entry);
@ -364,3 +367,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mmc_blk_mq_rw_recovery);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sd_update_bus_speed_mode);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_slab_folio_alloced);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_kmalloc_large_alloced);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_netlink_poll);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ep_create_wakeup_source);
EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_timerfd_create);

View File

@ -461,10 +461,14 @@ static void devfreq_monitor(struct work_struct *work)
if (err)
dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err);
if (devfreq->stop_polling)
goto out;
queue_delayed_work(devfreq_wq, &devfreq->work,
msecs_to_jiffies(devfreq->profile->polling_ms));
mutex_unlock(&devfreq->lock);
out:
mutex_unlock(&devfreq->lock);
trace_devfreq_monitor(devfreq);
}
@ -482,6 +486,10 @@ void devfreq_monitor_start(struct devfreq *devfreq)
if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN))
return;
mutex_lock(&devfreq->lock);
if (delayed_work_pending(&devfreq->work))
goto out;
switch (devfreq->profile->timer) {
case DEVFREQ_TIMER_DEFERRABLE:
INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
@ -490,12 +498,16 @@ void devfreq_monitor_start(struct devfreq *devfreq)
INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor);
break;
default:
return;
goto out;
}
if (devfreq->profile->polling_ms)
queue_delayed_work(devfreq_wq, &devfreq->work,
msecs_to_jiffies(devfreq->profile->polling_ms));
out:
devfreq->stop_polling = false;
mutex_unlock(&devfreq->lock);
}
EXPORT_SYMBOL(devfreq_monitor_start);
@ -512,6 +524,14 @@ void devfreq_monitor_stop(struct devfreq *devfreq)
if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN))
return;
mutex_lock(&devfreq->lock);
if (devfreq->stop_polling) {
mutex_unlock(&devfreq->lock);
return;
}
devfreq->stop_polling = true;
mutex_unlock(&devfreq->lock);
cancel_delayed_work_sync(&devfreq->work);
}
EXPORT_SYMBOL(devfreq_monitor_stop);

View File

@ -11,6 +11,7 @@
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include "dma-buf-sysfs-stats.h"
@ -168,35 +169,76 @@ void dma_buf_uninit_sysfs_statistics(void)
kset_unregister(dma_buf_stats_kset);
}
struct dma_buf_create_sysfs_entry {
struct dma_buf *dmabuf;
struct work_struct work;
};
union dma_buf_create_sysfs_work_entry {
struct dma_buf_create_sysfs_entry create_entry;
struct dma_buf_sysfs_entry sysfs_entry;
};
static void sysfs_add_workfn(struct work_struct *work)
{
struct dma_buf_create_sysfs_entry *create_entry =
container_of(work, struct dma_buf_create_sysfs_entry, work);
struct dma_buf *dmabuf = create_entry->dmabuf;
/*
* A dmabuf is ref-counted via its file member. If this handler holds the only
* reference to the dmabuf, there is no need for sysfs kobject creation. This is an
* optimization and a race; when the reference count drops to 1 immediately after
* this check it is not harmful as the sysfs entry will still get cleaned up in
* dma_buf_stats_teardown, which won't get called until the final dmabuf reference
* is released, and that can't happen until the end of this function.
*/
if (file_count(dmabuf->file) > 1) {
dmabuf->sysfs_entry->dmabuf = dmabuf;
/*
* kobject_init_and_add expects kobject to be zero-filled, but we have populated it
* (the sysfs_add_work union member) to trigger this work function.
*/
memset(&dmabuf->sysfs_entry->kobj, 0, sizeof(dmabuf->sysfs_entry->kobj));
dmabuf->sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset;
if (kobject_init_and_add(&dmabuf->sysfs_entry->kobj, &dma_buf_ktype, NULL,
"%lu", file_inode(dmabuf->file)->i_ino)) {
kobject_put(&dmabuf->sysfs_entry->kobj);
dmabuf->sysfs_entry = NULL;
}
} else {
/*
* Free the sysfs_entry and reset the pointer so dma_buf_stats_teardown doesn't
* attempt to operate on it.
*/
kfree(dmabuf->sysfs_entry);
dmabuf->sysfs_entry = NULL;
}
dma_buf_put(dmabuf);
}
int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file)
{
struct dma_buf_sysfs_entry *sysfs_entry;
int ret;
struct dma_buf_create_sysfs_entry *create_entry;
union dma_buf_create_sysfs_work_entry *work_entry;
if (!dmabuf->exp_name) {
pr_err("exporter name must not be empty if stats needed\n");
return -EINVAL;
}
sysfs_entry = kzalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL);
if (!sysfs_entry)
work_entry = kmalloc(sizeof(union dma_buf_create_sysfs_work_entry), GFP_KERNEL);
if (!work_entry)
return -ENOMEM;
sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset;
sysfs_entry->dmabuf = dmabuf;
dmabuf->sysfs_entry = &work_entry->sysfs_entry;
dmabuf->sysfs_entry = sysfs_entry;
create_entry = &work_entry->create_entry;
create_entry->dmabuf = dmabuf;
/* create the directory for buffer stats */
ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL,
"%lu", file_inode(file)->i_ino);
if (ret)
goto err_sysfs_dmabuf;
INIT_WORK(&create_entry->work, sysfs_add_workfn);
get_dma_buf(dmabuf); /* This reference will be dropped in sysfs_add_workfn. */
schedule_work(&create_entry->work);
return 0;
err_sysfs_dmabuf:
kobject_put(&sysfs_entry->kobj);
dmabuf->sysfs_entry = NULL;
return ret;
}

View File

@ -727,10 +727,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
dmabuf->resv = resv;
}
ret = dma_buf_stats_setup(dmabuf, file);
if (ret)
goto err_dmabuf;
file->private_data = dmabuf;
file->f_path.dentry->d_fsdata = dmabuf;
dmabuf->file = file;
@ -739,9 +735,19 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
list_add(&dmabuf->list_node, &db_list.head);
mutex_unlock(&db_list.lock);
ret = dma_buf_stats_setup(dmabuf, file);
if (ret)
goto err_sysfs;
return dmabuf;
err_dmabuf:
err_sysfs:
mutex_lock(&db_list.lock);
list_del(&dmabuf->list_node);
mutex_unlock(&db_list.lock);
dmabuf->file = NULL;
file->f_path.dentry->d_fsdata = NULL;
file->private_data = NULL;
if (!resv)
dma_resv_fini(dmabuf->resv);
kfree(dmabuf);

View File

@ -656,7 +656,9 @@ static void verity_end_io(struct bio *bio)
struct dm_verity_io *io = bio->bi_private;
if (bio->bi_status &&
(!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) {
(!verity_fec_is_enabled(io->v) ||
verity_is_system_shutting_down() ||
(bio->bi_opf & REQ_RAHEAD))) {
verity_finish_io(io, bio->bi_status);
return;
}

View File

@ -345,6 +345,7 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status)
{
queue->rcv_state = NVMET_TCP_RECV_ERR;
if (status == -EPIPE || status == -ECONNRESET)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
else
@ -871,15 +872,11 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
iov.iov_len = sizeof(*icresp);
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (ret < 0)
goto free_crypto;
return ret; /* queue removal will cleanup */
queue->state = NVMET_TCP_Q_LIVE;
nvmet_prepare_receive_pdu(queue);
return 0;
free_crypto:
if (queue->hdr_digest || queue->data_digest)
nvmet_tcp_free_crypto(queue);
return ret;
}
static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,

View File

@ -50,13 +50,55 @@ config PHY_QCOM_PCIE2
Enable this to support the Qualcomm PCIe PHY, used with the Synopsys
based PCIe controller.
config PHY_QCOM_QMP
tristate "Qualcomm QMP PHY Driver"
menuconfig PHY_QCOM_QMP
tristate "Qualcomm QMP PHY Drivers"
depends on OF && COMMON_CLK && (ARCH_QCOM || COMPILE_TEST)
if PHY_QCOM_QMP
config PHY_QCOM_QMP_COMBO
tristate "Qualcomm QMP Combo PHY Driver"
default PHY_QCOM_QMP
select GENERIC_PHY
help
Enable this to support the QMP PHY transceiver that is used
with controllers such as PCIe, UFS, and USB on Qualcomm chips.
Enable this to support the QMP Combo PHY transceiver that is used
with USB3 and DisplayPort controllers on Qualcomm chips.
config PHY_QCOM_QMP_PCIE
tristate "Qualcomm QMP PCIe PHY Driver"
depends on PCI || COMPILE_TEST
select GENERIC_PHY
default PHY_QCOM_QMP
help
Enable this to support the QMP PCIe PHY transceiver that is used
with PCIe controllers on Qualcomm chips.
config PHY_QCOM_QMP_PCIE_8996
tristate "Qualcomm QMP PCIe 8996 PHY Driver"
depends on PCI || COMPILE_TEST
select GENERIC_PHY
default PHY_QCOM_QMP
help
Enable this to support the QMP PCIe PHY transceiver that is used
with PCIe controllers on Qualcomm msm8996 chips.
config PHY_QCOM_QMP_UFS
tristate "Qualcomm QMP UFS PHY Driver"
select GENERIC_PHY
default PHY_QCOM_QMP
help
Enable this to support the QMP UFS PHY transceiver that is used
with UFS controllers on Qualcomm chips.
config PHY_QCOM_QMP_USB
tristate "Qualcomm QMP USB PHY Driver"
select GENERIC_PHY
default PHY_QCOM_QMP
help
Enable this to support the QMP USB PHY transceiver that is used
with USB3 controllers on Qualcomm chips.
endif # PHY_QCOM_QMP
config PHY_QCOM_QUSB2
tristate "Qualcomm QUSB2 PHY Driver"

View File

@ -5,12 +5,12 @@ obj-$(CONFIG_PHY_QCOM_EDP) += phy-qcom-edp.o
obj-$(CONFIG_PHY_QCOM_IPQ4019_USB) += phy-qcom-ipq4019-usb.o
obj-$(CONFIG_PHY_QCOM_IPQ806X_SATA) += phy-qcom-ipq806x-sata.o
obj-$(CONFIG_PHY_QCOM_PCIE2) += phy-qcom-pcie2.o
obj-$(CONFIG_PHY_QCOM_QMP) += \
phy-qcom-qmp-combo.o \
phy-qcom-qmp-pcie.o \
phy-qcom-qmp-pcie-msm8996.o \
phy-qcom-qmp-ufs.o \
phy-qcom-qmp-usb.o
obj-$(CONFIG_PHY_QCOM_QMP_COMBO) += phy-qcom-qmp-combo.o
obj-$(CONFIG_PHY_QCOM_QMP_PCIE) += phy-qcom-qmp-pcie.o
obj-$(CONFIG_PHY_QCOM_QMP_PCIE_8996) += phy-qcom-qmp-pcie-msm8996.o
obj-$(CONFIG_PHY_QCOM_QMP_UFS) += phy-qcom-qmp-ufs.o
obj-$(CONFIG_PHY_QCOM_QMP_USB) += phy-qcom-qmp-usb.o
obj-$(CONFIG_PHY_QCOM_QUSB2) += phy-qcom-qusb2.o
obj-$(CONFIG_PHY_QCOM_USB_HS) += phy-qcom-usb-hs.o

View File

@ -8683,7 +8683,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba)
ufs_bsg_probe(hba);
ufshpb_init(hba);
scsi_scan_host(hba->host);
pm_runtime_put_sync(hba->dev);
out:
return ret;
@ -8916,15 +8915,12 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie)
/* Probe and add UFS logical units */
ret = ufshcd_add_lus(hba);
out:
/*
* If we failed to initialize the device or the device is not
* present, turn off the power/clocks etc.
*/
if (ret) {
pm_runtime_put_sync(hba->dev);
ufshcd_hba_exit(hba);
}
if (ret)
dev_err(hba->dev, "%s failed: %d\n", __func__, ret);
}
static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd)

View File

@ -1233,6 +1233,9 @@ static int dwc3_core_init(struct dwc3 *dwc)
if (dwc->parkmode_disable_ss_quirk)
reg |= DWC3_GUCTL1_PARKMODE_DISABLE_SS;
if (dwc->parkmode_disable_hs_quirk)
reg |= DWC3_GUCTL1_PARKMODE_DISABLE_HS;
if (DWC3_VER_IS_WITHIN(DWC3, 290A, ANY) &&
(dwc->maximum_speed == USB_SPEED_HIGH ||
dwc->maximum_speed == USB_SPEED_FULL))
@ -1539,6 +1542,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
"snps,resume-hs-terminations");
dwc->parkmode_disable_ss_quirk = device_property_read_bool(dev,
"snps,parkmode-disable-ss-quirk");
dwc->parkmode_disable_hs_quirk = device_property_read_bool(dev,
"snps,parkmode-disable-hs-quirk");
dwc->gfladj_refclk_lpm_sel = device_property_read_bool(dev,
"snps,gfladj-refclk-lpm-sel-quirk");

View File

@ -263,6 +263,7 @@
#define DWC3_GUCTL1_DEV_FORCE_20_CLK_FOR_30_CLK BIT(26)
#define DWC3_GUCTL1_DEV_L1_EXIT_BY_HW BIT(24)
#define DWC3_GUCTL1_PARKMODE_DISABLE_SS BIT(17)
#define DWC3_GUCTL1_PARKMODE_DISABLE_HS BIT(16)
#define DWC3_GUCTL1_RESUME_OPMODE_HS_HOST BIT(10)
/* Global Status Register */
@ -1113,6 +1114,8 @@ struct dwc3_scratchpad_array {
* generation after resume from suspend.
* @parkmode_disable_ss_quirk: set if we need to disable all SuperSpeed
* instances in park mode.
* @parkmode_disable_hs_quirk: set if we need to disable all HishSpeed
* instances in park mode.
* @tx_de_emphasis_quirk: set if we enable Tx de-emphasis quirk
* @tx_de_emphasis: Tx de-emphasis value
* 0 - -6dB de-emphasis
@ -1330,6 +1333,7 @@ struct dwc3 {
unsigned dis_tx_ipgap_linecheck_quirk:1;
unsigned resume_hs_terminations:1;
unsigned parkmode_disable_ss_quirk:1;
unsigned parkmode_disable_hs_quirk:1;
unsigned gfladj_refclk_lpm_sel:1;
unsigned tx_de_emphasis_quirk:1;

View File

@ -2093,7 +2093,17 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
list_for_each_entry(r, &dep->pending_list, list) {
if (r == req) {
/*
* Explicitly check for EP0/1 as dequeue for those
* EPs need to be handled differently. Control EP
* only deals with one USB req, and giveback will
* occur during dwc3_ep0_stall_and_restart(). EP0
* requests are never added to started_list.
*/
if (dep->number > 1)
dwc3_gadget_giveback(dep, req, -ECONNRESET);
else
dwc3_ep0_reset_state(dwc);
goto out;
}
}

View File

@ -1619,8 +1619,6 @@ static void gadget_unbind_driver(struct device *dev)
dev_dbg(&udc->dev, "unbinding gadget driver [%s]\n", driver->function);
kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
udc->allow_connect = false;
cancel_work_sync(&udc->vbus_work);
mutex_lock(&udc->connect_lock);
@ -1640,6 +1638,8 @@ static void gadget_unbind_driver(struct device *dev)
driver->is_bound = false;
udc->driver = NULL;
mutex_unlock(&udc_lock);
kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
}
/* ------------------------------------------------------------------------- */

View File

@ -1053,20 +1053,20 @@ static void xhci_get_usb3_port_status(struct xhci_port *port, u32 *status,
*status |= USB_PORT_STAT_C_CONFIG_ERROR << 16;
/* USB3 specific wPortStatus bits */
if (portsc & PORT_POWER) {
if (portsc & PORT_POWER)
*status |= USB_SS_PORT_STAT_POWER;
/* link state handling */
if (link_state == XDEV_U0)
bus_state->suspended_ports &= ~(1 << portnum);
}
/* remote wake resume signaling complete */
if (bus_state->port_remote_wakeup & (1 << portnum) &&
/* no longer suspended or resuming */
if (link_state != XDEV_U3 &&
link_state != XDEV_RESUME &&
link_state != XDEV_RECOVERY) {
/* remote wake resume signaling complete */
if (bus_state->port_remote_wakeup & (1 << portnum)) {
bus_state->port_remote_wakeup &= ~(1 << portnum);
usb_hcd_end_port_resume(&hcd->self, portnum);
}
bus_state->suspended_ports &= ~(1 << portnum);
}
xhci_hub_report_usb3_link_state(xhci, status, portsc);
xhci_del_comp_mod_timer(xhci, portsc, portnum);
@ -1111,6 +1111,21 @@ static void xhci_get_usb2_port_status(struct xhci_port *port, u32 *status,
return;
}
}
/*
* Clear usb2 resume signalling variables if port is no longer suspended
* or resuming. Port either resumed to U0/U1/U2, disconnected, or in a
* error state. Resume related variables should be cleared in all those cases.
*/
if (link_state != XDEV_U3 && link_state != XDEV_RESUME) {
if (bus_state->resume_done[portnum] ||
test_bit(portnum, &bus_state->resuming_ports)) {
bus_state->resume_done[portnum] = 0;
clear_bit(portnum, &bus_state->resuming_ports);
usb_hcd_end_port_resume(&port->rhub->hcd->self, portnum);
}
bus_state->suspended_ports &= ~(1 << portnum);
}
}
/*

View File

@ -2855,7 +2855,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
PD_MSG_CTRL_NOT_SUPP,
NONE_AMS);
} else {
if (port->send_discover) {
if (port->send_discover && port->negotiated_rev < PD_REV30) {
tcpm_queue_message(port, PD_MSG_CTRL_WAIT);
break;
}
@ -2871,7 +2871,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
PD_MSG_CTRL_NOT_SUPP,
NONE_AMS);
} else {
if (port->send_discover) {
if (port->send_discover && port->negotiated_rev < PD_REV30) {
tcpm_queue_message(port, PD_MSG_CTRL_WAIT);
break;
}
@ -2880,7 +2880,7 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
}
break;
case PD_CTRL_VCONN_SWAP:
if (port->send_discover) {
if (port->send_discover && port->negotiated_rev < PD_REV30) {
tcpm_queue_message(port, PD_MSG_CTRL_WAIT);
break;
}

View File

@ -196,6 +196,7 @@ static int gh_vcpu_run(struct gh_vcpu *vcpu)
}
gh_error = gh_hypercall_vcpu_run(vcpu->rsc->capid, state_data, &vcpu_run_resp);
memset(state_data, 0, sizeof(state_data));
if (gh_error == GH_ERROR_OK) {
switch (vcpu_run_resp.state) {
case GH_VCPU_STATE_READY:

View File

@ -89,8 +89,7 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize);
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool);
extern const struct z_erofs_decompressor erofs_decompressors[];
/* prototypes for specific algorithms */
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,

View File

@ -404,6 +404,8 @@ const struct address_space_operations erofs_raw_access_aops = {
.readahead = erofs_readahead,
.bmap = erofs_bmap,
.direct_IO = noop_direct_IO,
.release_folio = iomap_release_folio,
.invalidate_folio = iomap_invalidate_folio,
};
#ifdef CONFIG_FS_DAX

View File

@ -122,11 +122,11 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
}
static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
void *inpage, unsigned int *inputmargin, int *maptype,
bool may_inplace)
void *inpage, void *out, unsigned int *inputmargin,
int *maptype, bool may_inplace)
{
struct z_erofs_decompress_req *rq = ctx->rq;
unsigned int omargin, total, i, j;
unsigned int omargin, total, i;
struct page **in;
void *src, *tmp;
@ -136,20 +136,20 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize))
goto docopy;
for (i = 0; i < ctx->inpages; ++i) {
DBG_BUGON(rq->in[i] == NULL);
for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j)
if (rq->out[j] == rq->in[i])
for (i = 0; i < ctx->inpages; ++i)
if (rq->out[ctx->outpages - ctx->inpages + i] !=
rq->in[i])
goto docopy;
}
kunmap_local(inpage);
*maptype = 3;
return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT);
}
if (ctx->inpages <= 1) {
*maptype = 0;
return inpage;
}
kunmap_atomic(inpage);
might_sleep();
kunmap_local(inpage);
src = erofs_vm_map_ram(rq->in, ctx->inpages);
if (!src)
return ERR_PTR(-ENOMEM);
@ -162,7 +162,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
src = erofs_get_pcpubuf(ctx->inpages);
if (!src) {
DBG_BUGON(1);
kunmap_atomic(inpage);
kunmap_local(inpage);
return ERR_PTR(-EFAULT);
}
@ -173,9 +173,9 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
min_t(unsigned int, total, PAGE_SIZE - *inputmargin);
if (!inpage)
inpage = kmap_atomic(*in);
inpage = kmap_local_page(*in);
memcpy(tmp, inpage + *inputmargin, page_copycnt);
kunmap_atomic(inpage);
kunmap_local(inpage);
inpage = NULL;
tmp += page_copycnt;
total -= page_copycnt;
@ -205,16 +205,16 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
}
static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
u8 *out)
u8 *dst)
{
struct z_erofs_decompress_req *rq = ctx->rq;
bool support_0padding = false, may_inplace = false;
unsigned int inputmargin;
u8 *headpage, *src;
u8 *out, *headpage, *src;
int ret, maptype;
DBG_BUGON(*rq->in == NULL);
headpage = kmap_atomic(*rq->in);
headpage = kmap_local_page(*rq->in);
/* LZ4 decompression inplace is only safe if zero_padding is enabled */
if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) {
@ -223,7 +223,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
min_t(unsigned int, rq->inputsize,
rq->sb->s_blocksize - rq->pageofs_in));
if (ret) {
kunmap_atomic(headpage);
kunmap_local(headpage);
return ret;
}
may_inplace = !((rq->pageofs_in + rq->inputsize) &
@ -231,11 +231,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
}
inputmargin = rq->pageofs_in;
src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin,
src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin,
&maptype, may_inplace);
if (IS_ERR(src))
return PTR_ERR(src);
out = dst + rq->pageofs_out;
/* legacy format could compress extra data in a pcluster. */
if (rq->partial_decoding || !support_0padding)
ret = LZ4_decompress_safe_partial(src + inputmargin, out,
@ -261,12 +262,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
}
if (maptype == 0) {
kunmap_atomic(headpage);
kunmap_local(headpage);
} else if (maptype == 1) {
vm_unmap_ram(src, ctx->inpages);
} else if (maptype == 2) {
erofs_put_pcpubuf(src);
} else {
} else if (maptype != 3) {
DBG_BUGON(1);
return -EFAULT;
}
@ -289,7 +290,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
/* one optimized fast path only for non bigpcluster cases yet */
if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) {
DBG_BUGON(!*rq->out);
dst = kmap_atomic(*rq->out);
dst = kmap_local_page(*rq->out);
dst_maptype = 0;
goto dstmap_out;
}
@ -309,9 +310,9 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
}
dstmap_out:
ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out);
ret = z_erofs_lz4_decompress_mem(&ctx, dst);
if (!dst_maptype)
kunmap_atomic(dst);
kunmap_local(dst);
else if (dst_maptype == 2)
vm_unmap_ram(dst, ctx.outpages);
return ret;
@ -320,50 +321,63 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
const unsigned int inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
const unsigned int outpages =
const unsigned int nrpages_in =
PAGE_ALIGN(rq->pageofs_in + rq->inputsize) >> PAGE_SHIFT;
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int righthalf = min_t(unsigned int, rq->outputsize,
PAGE_SIZE - rq->pageofs_out);
const unsigned int lefthalf = rq->outputsize - righthalf;
const unsigned int interlaced_offset =
rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out;
unsigned char *src, *dst;
const unsigned int bs = rq->sb->s_blocksize;
unsigned int cur = 0, ni = 0, no, pi, po, insz, cnt;
u8 *kin;
if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
DBG_BUGON(1);
return -EFSCORRUPTED;
DBG_BUGON(rq->outputsize > rq->inputsize);
if (rq->alg == Z_EROFS_COMPRESSION_INTERLACED) {
cur = bs - (rq->pageofs_out & (bs - 1));
pi = (rq->pageofs_in + rq->inputsize - cur) & ~PAGE_MASK;
cur = min(cur, rq->outputsize);
if (cur && rq->out[0]) {
kin = kmap_local_page(rq->in[nrpages_in - 1]);
if (rq->out[0] == rq->in[nrpages_in - 1]) {
memmove(kin + rq->pageofs_out, kin + pi, cur);
flush_dcache_page(rq->out[0]);
} else {
memcpy_to_page(rq->out[0], rq->pageofs_out,
kin + pi, cur);
}
kunmap_local(kin);
}
rq->outputsize -= cur;
}
if (rq->out[0] == *rq->in) {
DBG_BUGON(rq->pageofs_out);
return 0;
for (; rq->outputsize; rq->pageofs_in = 0, cur += PAGE_SIZE, ni++) {
insz = min_t(unsigned int, PAGE_SIZE - rq->pageofs_in,
rq->outputsize);
rq->outputsize -= insz;
if (!rq->in[ni])
continue;
kin = kmap_local_page(rq->in[ni]);
pi = 0;
do {
no = (rq->pageofs_out + cur + pi) >> PAGE_SHIFT;
po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK;
DBG_BUGON(no >= nrpages_out);
cnt = min_t(unsigned int, insz - pi, PAGE_SIZE - po);
if (rq->out[no] == rq->in[ni]) {
memmove(kin + po,
kin + rq->pageofs_in + pi, cnt);
flush_dcache_page(rq->out[no]);
} else if (rq->out[no]) {
memcpy_to_page(rq->out[no], po,
kin + rq->pageofs_in + pi, cnt);
}
src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in;
if (rq->out[0]) {
dst = kmap_local_page(rq->out[0]);
memcpy(dst + rq->pageofs_out, src + interlaced_offset,
righthalf);
kunmap_local(dst);
pi += cnt;
} while (pi < insz);
kunmap_local(kin);
}
if (outpages > inpages) {
DBG_BUGON(!rq->out[outpages - 1]);
if (rq->out[outpages - 1] != rq->in[inpages - 1]) {
dst = kmap_local_page(rq->out[outpages - 1]);
memcpy(dst, interlaced_offset ? src :
(src + righthalf), lefthalf);
kunmap_local(dst);
} else if (!interlaced_offset) {
memmove(src, src + righthalf, lefthalf);
}
}
kunmap_local(src);
DBG_BUGON(ni > nrpages_in);
return 0;
}
static struct z_erofs_decompressor decompressors[] = {
const struct z_erofs_decompressor erofs_decompressors[] = {
[Z_EROFS_COMPRESSION_SHIFTED] = {
.decompress = z_erofs_transform_plain,
.name = "shifted"
@ -383,9 +397,3 @@ static struct z_erofs_decompressor decompressors[] = {
},
#endif
};
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
return decompressors[rq->alg].decompress(rq, pagepool);
}

View File

@ -291,14 +291,19 @@ static int erofs_fill_inode(struct inode *inode)
}
if (erofs_inode_is_data_compressed(vi->datalayout)) {
if (!erofs_is_fscache_mode(inode->i_sb) &&
inode->i_sb->s_blocksize_bits == PAGE_SHIFT)
if (!erofs_is_fscache_mode(inode->i_sb)) {
DO_ONCE_LITE_IF(inode->i_sb->s_blocksize != PAGE_SIZE,
erofs_info, inode->i_sb,
"EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!");
err = z_erofs_fill_inode(inode);
}
else
err = -EOPNOTSUPP;
goto out_unlock;
}
inode->i_mapping->a_ops = &erofs_raw_access_aops;
if (!erofs_is_fscache_mode(inode->i_sb))
mapping_set_large_folios(inode->i_mapping);
#ifdef CONFIG_EROFS_FS_ONDEMAND
if (erofs_is_fscache_mode(inode->i_sb))
inode->i_mapping->a_ops = &erofs_fscache_access_aops;

View File

@ -544,7 +544,7 @@ int __init z_erofs_init_zip_subsystem(void);
void z_erofs_exit_zip_subsystem(void);
int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
struct erofs_workgroup *egrp);
int erofs_try_to_free_cached_page(struct page *page);
int erofs_init_managed_cache(struct super_block *sb);
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int len);
@ -565,6 +565,7 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb,
}
return 0;
}
static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif /* !CONFIG_EROFS_FS_ZIP */
#ifdef CONFIG_EROFS_FS_ZIP_LZMA

View File

@ -597,68 +597,6 @@ static int erofs_fc_parse_param(struct fs_context *fc,
return 0;
}
#ifdef CONFIG_EROFS_FS_ZIP
static const struct address_space_operations managed_cache_aops;
static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp)
{
bool ret = true;
struct address_space *const mapping = folio->mapping;
DBG_BUGON(!folio_test_locked(folio));
DBG_BUGON(mapping->a_ops != &managed_cache_aops);
if (folio_test_private(folio))
ret = erofs_try_to_free_cached_page(&folio->page);
return ret;
}
/*
* It will be called only on inode eviction. In case that there are still some
* decompression requests in progress, wait with rescheduling for a bit here.
* We could introduce an extra locking instead but it seems unnecessary.
*/
static void erofs_managed_cache_invalidate_folio(struct folio *folio,
size_t offset, size_t length)
{
const size_t stop = length + offset;
DBG_BUGON(!folio_test_locked(folio));
/* Check for potential overflow in debug mode */
DBG_BUGON(stop > folio_size(folio) || stop < length);
if (offset == 0 && stop == folio_size(folio))
while (!erofs_managed_cache_release_folio(folio, GFP_NOFS))
cond_resched();
}
static const struct address_space_operations managed_cache_aops = {
.release_folio = erofs_managed_cache_release_folio,
.invalidate_folio = erofs_managed_cache_invalidate_folio,
};
static int erofs_init_managed_cache(struct super_block *sb)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
struct inode *const inode = new_inode(sb);
if (!inode)
return -ENOMEM;
set_nlink(inode, 1);
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &managed_cache_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
sbi->managed_cache = inode;
return 0;
}
#else
static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif
static struct inode *erofs_nfs_get_inode(struct super_block *sb,
u64 ino, u32 generation)
{

File diff suppressed because it is too large Load Diff

View File

@ -101,29 +101,26 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
}
static unsigned int decode_compactedbits(unsigned int lobits,
unsigned int lomask,
u8 *in, unsigned int pos, u8 *type)
{
const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
const unsigned int lo = v & lomask;
const unsigned int lo = v & ((1 << lobits) - 1);
*type = (v >> lobits) & 3;
return lo;
}
static int get_compacted_la_distance(unsigned int lclusterbits,
static int get_compacted_la_distance(unsigned int lobits,
unsigned int encodebits,
unsigned int vcnt, u8 *in, int i)
{
const unsigned int lomask = (1 << lclusterbits) - 1;
unsigned int lo, d1 = 0;
u8 type;
DBG_BUGON(i >= vcnt);
do {
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
lo = decode_compactedbits(lobits, in, encodebits * i, &type);
if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
return d1;
@ -142,15 +139,14 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
{
struct erofs_inode *const vi = EROFS_I(m->inode);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
const unsigned int lomask = (1 << lclusterbits) - 1;
unsigned int vcnt, base, lo, encodebits, nblk, eofs;
unsigned int vcnt, base, lo, lobits, encodebits, nblk, eofs;
int i;
u8 *in, type;
bool big_pcluster;
if (1 << amortizedshift == 4 && lclusterbits <= 14)
vcnt = 2;
else if (1 << amortizedshift == 2 && lclusterbits == 12)
else if (1 << amortizedshift == 2 && lclusterbits <= 12)
vcnt = 16;
else
return -EOPNOTSUPP;
@ -159,6 +155,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
(vcnt << amortizedshift);
big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
lobits = max(lclusterbits, ilog2(Z_EROFS_VLE_DI_D0_CBLKCNT) + 1U);
encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
eofs = erofs_blkoff(m->inode->i_sb, pos);
base = round_down(eofs, vcnt << amortizedshift);
@ -166,15 +163,14 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
i = (eofs - base) >> amortizedshift;
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
lo = decode_compactedbits(lobits, in, encodebits * i, &type);
m->type = type;
if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
m->clusterofs = 1 << lclusterbits;
/* figure out lookahead_distance: delta[1] if needed */
if (lookahead)
m->delta[1] = get_compacted_la_distance(lclusterbits,
m->delta[1] = get_compacted_la_distance(lobits,
encodebits, vcnt, in, i);
if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
if (!big_pcluster) {
@ -193,8 +189,8 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
* of which lo saves delta[1] rather than delta[0].
* Hence, get delta[0] by the previous lcluster indirectly.
*/
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * (i - 1), &type);
lo = decode_compactedbits(lobits, in,
encodebits * (i - 1), &type);
if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
lo = 0;
else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT)
@ -209,8 +205,8 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
nblk = 1;
while (i > 0) {
--i;
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
lo = decode_compactedbits(lobits, in,
encodebits * i, &type);
if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
i -= lo;
@ -221,8 +217,8 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
nblk = 0;
while (i > 0) {
--i;
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
lo = decode_compactedbits(lobits, in,
encodebits * i, &type);
if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
--i;

View File

@ -39,6 +39,8 @@
#include <linux/rculist.h>
#include <net/busy_poll.h>
#include <trace/hooks/fs.h>
/*
* LOCKING:
* There are three level of locking required by epoll :
@ -1373,15 +1375,20 @@ static int ep_create_wakeup_source(struct epitem *epi)
{
struct name_snapshot n;
struct wakeup_source *ws;
char ws_name[64];
strlcpy(ws_name, "eventpoll", sizeof(ws_name));
trace_android_vh_ep_create_wakeup_source(ws_name, sizeof(ws_name));
if (!epi->ep->ws) {
epi->ep->ws = wakeup_source_register(NULL, "eventpoll");
epi->ep->ws = wakeup_source_register(NULL, ws_name);
if (!epi->ep->ws)
return -ENOMEM;
}
take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
ws = wakeup_source_register(NULL, n.name.name);
strlcpy(ws_name, n.name.name, sizeof(ws_name));
trace_android_vh_ep_create_wakeup_source(ws_name, sizeof(ws_name));
ws = wakeup_source_register(NULL, ws_name);
release_dentry_name_snapshot(&n);
if (!ws)

View File

@ -2734,7 +2734,9 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
f2fs_update_inode_page(inode);
/* 3: update and set xattr node page dirty */
memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE);
if (page)
memcpy(F2FS_NODE(xpage), F2FS_NODE(page),
VALID_XATTR_BLOCK_SIZE);
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);

View File

@ -363,10 +363,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
*xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name);
if (!*xe) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
f2fs_err(F2FS_I_SB(inode), "lookup inode (%lu) has corrupted xattr",
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
err = -EFSCORRUPTED;
err = -ENODATA;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
goto out;
@ -583,13 +583,12 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
(void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
f2fs_err(F2FS_I_SB(inode), "list inode (%lu) has corrupted xattr",
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
error = -EFSCORRUPTED;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
goto cleanup;
break;
}
if (!handler || (handler->list && !handler->list(dentry)))
@ -650,7 +649,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (size > MAX_VALUE_LEN(inode))
return -E2BIG;
retry:
error = read_all_xattrs(inode, ipage, &base_addr);
if (error)
return error;
@ -660,7 +659,14 @@ static int __f2fs_setxattr(struct inode *inode, int index,
/* find entry with wanted name. */
here = __find_xattr(base_addr, last_base_addr, NULL, index, len, name);
if (!here) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
if (!F2FS_I(inode)->i_xattr_nid) {
f2fs_notice(F2FS_I_SB(inode),
"recover xattr in inode (%lu)", inode->i_ino);
f2fs_recover_xattr_data(inode, NULL);
kfree(base_addr);
goto retry;
}
f2fs_err(F2FS_I_SB(inode), "set inode (%lu) has corrupted xattr",
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
error = -EFSCORRUPTED;

View File

@ -1117,7 +1117,6 @@ int fuse_lookup_backing(struct fuse_bpf_args *fa, struct inode *dir,
struct kstat stat;
int err;
/* TODO this will not handle lookups over mount points */
inode_lock_nested(dir_backing_inode, I_MUTEX_PARENT);
backing_entry = lookup_one_len(entry->d_name.name, dir_backing_entry,
strlen(entry->d_name.name));
@ -1136,16 +1135,22 @@ int fuse_lookup_backing(struct fuse_bpf_args *fa, struct inode *dir,
return 0;
}
err = follow_down(&fuse_entry->backing_path);
if (err)
goto err_out;
err = vfs_getattr(&fuse_entry->backing_path, &stat,
STATX_BASIC_STATS, 0);
if (err) {
path_put_init(&fuse_entry->backing_path);
return err;
}
if (err)
goto err_out;
fuse_stat_to_attr(get_fuse_conn(dir),
backing_entry->d_inode, &stat, &feo->attr);
return 0;
err_out:
path_put_init(&fuse_entry->backing_path);
return err;
}
int fuse_handle_backing(struct fuse_entry_bpf *feb, struct inode **backing_inode,

View File

@ -28,6 +28,8 @@
#include <linux/rcupdate.h>
#include <linux/time_namespace.h>
#include <trace/hooks/fs.h>
struct timerfd_ctx {
union {
struct hrtimer tmr;
@ -407,6 +409,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
{
int ufd;
struct timerfd_ctx *ctx;
char file_name_buf[32];
/* Check the TFD_* constants for consistency. */
BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@ -443,7 +446,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
ctx->moffs = ktime_mono_to_real(0);
ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
strlcpy(file_name_buf, "[timerfd]", sizeof(file_name_buf));
trace_android_vh_timerfd_create(file_name_buf, sizeof(file_name_buf));
ufd = anon_inode_getfd(file_name_buf, &timerfd_fops, ctx,
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
if (ufd < 0)
kfree(ctx);

View File

@ -2697,6 +2697,9 @@ enum bpf_text_poke_type {
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *addr1, void *addr2);
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old);
void *bpf_arch_text_copy(void *dst, void *src, size_t len);
int bpf_arch_text_invalidate(void *dst, size_t len);

View File

@ -357,6 +357,7 @@ struct damon_operations {
* @after_wmarks_check: Called after each schemes' watermarks check.
* @after_sampling: Called after each sampling.
* @after_aggregation: Called after each aggregation.
* @before_damos_apply: Called before applying DAMOS action.
* @before_terminate: Called before terminating the monitoring.
* @private: User private data.
*
@ -385,6 +386,10 @@ struct damon_callback {
int (*after_wmarks_check)(struct damon_ctx *context);
int (*after_sampling)(struct damon_ctx *context);
int (*after_aggregation)(struct damon_ctx *context);
int (*before_damos_apply)(struct damon_ctx *context,
struct damon_target *target,
struct damon_region *region,
struct damos *scheme);
void (*before_terminate)(struct damon_ctx *context);
};

View File

@ -249,6 +249,8 @@ struct maple_tree {
struct maple_tree name = MTREE_INIT(name, 0)
#define mtree_lock(mt) spin_lock((&(mt)->ma_lock))
#define mtree_lock_nested(mas, subclass) \
spin_lock_nested((&(mt)->ma_lock), subclass)
#define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock))
/*
@ -320,6 +322,9 @@ int mtree_store(struct maple_tree *mt, unsigned long index,
void *entry, gfp_t gfp);
void *mtree_erase(struct maple_tree *mt, unsigned long index);
int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
void mtree_destroy(struct maple_tree *mt);
void __mt_destroy(struct maple_tree *mt);
@ -399,6 +404,8 @@ struct ma_wr_state {
};
#define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock))
#define mas_lock_nested(mas, subclass) \
spin_lock_nested(&((mas)->tree->ma_lock), subclass)
#define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock))
@ -525,6 +532,22 @@ static inline void mas_reset(struct ma_state *mas)
*/
#define mas_for_each(__mas, __entry, __max) \
while (((__entry) = mas_find((__mas), (__max))) != NULL)
/**
* __mas_set_range() - Set up Maple Tree operation state to a sub-range of the
* current location.
* @mas: Maple Tree operation state.
* @start: New start of range in the Maple Tree.
* @last: New end of range in the Maple Tree.
*
* set the internal maple state values to a sub-range.
* Please use mas_set_range() if you do not know where you are in the tree.
*/
static inline void __mas_set_range(struct ma_state *mas, unsigned long start,
unsigned long last)
{
mas->index = start;
mas->last = last;
}
/**
* mas_set_range() - Set up Maple Tree operation state for a different index.
@ -539,8 +562,7 @@ static inline void mas_reset(struct ma_state *mas)
static inline
void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last)
{
mas->index = start;
mas->last = last;
__mas_set_range(mas, start, last);
mas->node = MAS_START;
}

View File

@ -243,7 +243,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
break;
case NFPROTO_BRIDGE:
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
hook_head = rcu_dereference(get_nf_hooks_bridge(net)[hook]);
#endif
break;
default:

View File

@ -188,6 +188,36 @@ struct net {
#endif
} __randomize_layout;
/*
* To work around a KMI issue, hooks_bridge[] could not be
* added to struct netns_nf. Since the only use of netns_nf
* is embedded in struct net, struct ext_net is added to
* contain struct net plus the new field. Users of the new
* field must use get_nf_hooks_bridge() to access the field.
*/
struct ext_net {
struct net net;
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
#endif
ANDROID_VENDOR_DATA(1);
};
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
extern struct net init_net;
extern struct nf_hook_entries **init_nf_hooks_bridgep;
static inline struct nf_hook_entries __rcu **get_nf_hooks_bridge(const struct net *net)
{
struct ext_net *ext_net;
if (net == &init_net)
return init_nf_hooks_bridgep;
ext_net = container_of(net, struct ext_net, net);
return ext_net->hooks_bridge;
}
#endif
#include <linux/seq_file_net.h>
/* Init's network namespace */

View File

@ -22,9 +22,6 @@ struct netns_nf {
#ifdef CONFIG_NETFILTER_FAMILY_ARP
struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS];
#endif
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
unsigned int defrag_ipv4_users;
#endif

View File

@ -42,6 +42,9 @@ DECLARE_HOOK(android_vh_rtmutex_wait_start,
DECLARE_HOOK(android_vh_rtmutex_wait_finish,
TP_PROTO(struct rt_mutex_base *lock),
TP_ARGS(lock));
DECLARE_HOOK(android_vh_rt_mutex_steal,
TP_PROTO(int waiter_prio, int top_waiter_prio, bool *ret),
TP_ARGS(waiter_prio, top_waiter_prio, ret));
DECLARE_HOOK(android_vh_rwsem_read_wait_start,
TP_PROTO(struct rw_semaphore *sem),

23
include/trace/hooks/fs.h Normal file
View File

@ -0,0 +1,23 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM fs
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH trace/hooks
#if !defined(_TRACE_HOOK_FS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_HOOK_FS_H
#include <trace/hooks/vendor_hooks.h>
DECLARE_HOOK(android_vh_ep_create_wakeup_source,
TP_PROTO(char *name, int len),
TP_ARGS(name, len));
DECLARE_HOOK(android_vh_timerfd_create,
TP_PROTO(char *name, int len),
TP_ARGS(name, len));
#endif /* _TRACE_HOOK_FS_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@ -76,6 +76,9 @@ struct slabinfo;
DECLARE_HOOK(android_vh_cache_show,
TP_PROTO(struct seq_file *m, struct slabinfo *sinfo, struct kmem_cache *s),
TP_ARGS(m, sinfo, s));
DECLARE_HOOK(android_vh_read_pages,
TP_PROTO(struct readahead_control *ractl),
TP_ARGS(ractl));
DECLARE_HOOK(android_vh_alloc_pages_reclaim_bypass,
TP_PROTO(gfp_t gfp_mask, int order, int alloc_flags,
int migratetype, struct page **page),

View File

@ -25,6 +25,13 @@ DECLARE_RESTRICTED_HOOK(android_rvh_sk_alloc,
DECLARE_RESTRICTED_HOOK(android_rvh_sk_free,
TP_PROTO(struct sock *sock), TP_ARGS(sock), 1);
struct poll_table_struct;
typedef struct poll_table_struct poll_table;
DECLARE_HOOK(android_vh_netlink_poll,
TP_PROTO(struct file *file, struct socket *sock, poll_table *wait,
__poll_t *mask),
TP_ARGS(file, sock, wait, mask));
/* macro versions of hooks are no longer required */
#endif /* _TRACE_HOOK_NET_VH_H */

View File

@ -567,7 +567,8 @@
* @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC
* (for the BSSID) and %NL80211_ATTR_PMKID or using %NL80211_ATTR_SSID,
* %NL80211_ATTR_FILS_CACHE_ID, and %NL80211_ATTR_PMKID in case of FILS
* authentication.
* authentication. Additionally in case of SAE offload and OWE offloads
* PMKSA entry can be deleted using %NL80211_ATTR_SSID.
* @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries.
*
* @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain

View File

@ -202,6 +202,7 @@ config GKI_HIDDEN_NET_CONFIGS
select PAGE_POOL
select NET_PTP_CLASSIFY
select NET_DEVLINK
select NETFILTER_FAMILY_BRIDGE
help
Dummy config option used to enable the networking hidden
config, required by various SoC platforms.

View File

@ -51,7 +51,6 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
struct seq_file *m)
{
struct io_sq_data *sq = NULL;
struct io_overflow_cqe *ocqe;
struct io_rings *r = ctx->rings;
unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
@ -62,6 +61,7 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
unsigned int cq_shift = 0;
unsigned int sq_shift = 0;
unsigned int sq_entries, cq_entries;
int sq_pid = -1, sq_cpu = -1;
bool has_lock;
unsigned int i;
@ -139,13 +139,19 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
has_lock = mutex_trylock(&ctx->uring_lock);
if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
sq = ctx->sq_data;
if (!sq->thread)
sq = NULL;
struct io_sq_data *sq = ctx->sq_data;
if (mutex_trylock(&sq->lock)) {
if (sq->thread) {
sq_pid = task_pid_nr(sq->thread);
sq_cpu = task_cpu(sq->thread);
}
mutex_unlock(&sq->lock);
}
}
seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1);
seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
seq_printf(m, "SqThread:\t%d\n", sq_pid);
seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
struct file *f = io_file_from_index(&ctx->file_table, i);

View File

@ -997,11 +997,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map,
mutex_unlock(&aux->poke_mutex);
}
void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old)
{
WARN_ON_ONCE(1);
}
static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
struct bpf_prog *old,
struct bpf_prog *new)
{
u8 *old_addr, *new_addr, *old_bypass_addr;
struct prog_poke_elem *elem;
struct bpf_array_aux *aux;
@ -1010,7 +1015,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
list_for_each_entry(elem, &aux->poke_progs, list) {
struct bpf_jit_poke_descriptor *poke;
int i, ret;
int i;
for (i = 0; i < elem->aux->size_poke_tab; i++) {
poke = &elem->aux->poke_tab[i];
@ -1029,21 +1034,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
* activated, so tail call updates can arrive from here
* while JIT is still finishing its final fixup for
* non-activated poke entries.
* 3) On program teardown, the program's kallsym entry gets
* removed out of RCU callback, but we can only untrack
* from sleepable context, therefore bpf_arch_text_poke()
* might not see that this is in BPF text section and
* bails out with -EINVAL. As these are unreachable since
* RCU grace period already passed, we simply skip them.
* 4) Also programs reaching refcount of zero while patching
* 3) Also programs reaching refcount of zero while patching
* is in progress is okay since we're protected under
* poke_mutex and untrack the programs before the JIT
* buffer is freed. When we're still in the middle of
* patching and suddenly kallsyms entry of the program
* gets evicted, we just skip the rest which is fine due
* to point 3).
* 5) Any other error happening below from bpf_arch_text_poke()
* is a unexpected bug.
* buffer is freed.
*/
if (!READ_ONCE(poke->tailcall_target_stable))
continue;
@ -1053,39 +1047,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
poke->tail_call.key != key)
continue;
old_bypass_addr = old ? NULL : poke->bypass_addr;
old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
if (new) {
ret = bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP,
old_addr, new_addr);
BUG_ON(ret < 0 && ret != -EINVAL);
if (!old) {
ret = bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
poke->bypass_addr,
NULL);
BUG_ON(ret < 0 && ret != -EINVAL);
}
} else {
ret = bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
old_bypass_addr,
poke->bypass_addr);
BUG_ON(ret < 0 && ret != -EINVAL);
/* let other CPUs finish the execution of program
* so that it will not possible to expose them
* to invalid nop, stack unwind, nop state
*/
if (!ret)
synchronize_rcu();
ret = bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP,
old_addr, NULL);
BUG_ON(ret < 0 && ret != -EINVAL);
}
bpf_arch_poke_desc_update(poke, new, old);
}
}
}

View File

@ -43,6 +43,7 @@ u64 dma_direct_get_required_mask(struct device *dev)
return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
}
EXPORT_SYMBOL_GPL(dma_direct_get_required_mask);
static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
u64 *phys_limit)
@ -320,6 +321,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
__dma_direct_free_pages(dev, page, size);
return NULL;
}
EXPORT_SYMBOL_GPL(dma_direct_alloc);
void dma_direct_free(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
@ -365,6 +367,7 @@ void dma_direct_free(struct device *dev, size_t size,
__dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size);
}
EXPORT_SYMBOL_GPL(dma_direct_free);
struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)

View File

@ -27,6 +27,7 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
return ret;
}
EXPORT_SYMBOL_GPL(dma_common_get_sgtable);
/*
* Create userspace mapping for the DMA-coherent memory.
@ -57,6 +58,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
return -ENXIO;
#endif /* CONFIG_MMU */
}
EXPORT_SYMBOL_GPL(dma_common_mmap);
struct page *dma_common_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)

View File

@ -659,7 +659,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
int retval;
unsigned long charge = 0;
LIST_HEAD(uf);
MA_STATE(old_mas, &oldmm->mm_mt, 0, 0);
MA_STATE(mas, &mm->mm_mt, 0, 0);
uprobe_start_dup_mmap();
@ -687,16 +686,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
goto out;
khugepaged_fork(mm, oldmm);
retval = mas_expected_entries(&mas, oldmm->map_count);
if (retval)
/* Use __mt_dup() to efficiently build an identical maple tree. */
retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL);
if (unlikely(retval))
goto out;
mt_clear_in_rcu(mas.tree);
mas_for_each(&old_mas, mpnt, ULONG_MAX) {
mas_for_each(&mas, mpnt, ULONG_MAX) {
struct file *file;
vma_start_write(mpnt);
if (mpnt->vm_flags & VM_DONTCOPY) {
__mas_set_range(&mas, mpnt->vm_start, mpnt->vm_end - 1);
mas_store_gfp(&mas, NULL, GFP_KERNEL);
if (unlikely(mas_is_err(&mas))) {
retval = -ENOMEM;
goto loop_out;
}
vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
continue;
}
@ -758,12 +764,13 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
if (is_vm_hugetlb_page(tmp))
hugetlb_dup_vma_private(tmp);
/* Link the vma into the MT */
/*
* Link the vma into the MT. After using __mt_dup(), memory
* allocation is not necessary here, so it cannot fail.
*/
mas.index = tmp->vm_start;
mas.last = tmp->vm_end - 1;
mas_store(&mas, tmp);
if (mas_is_err(&mas))
goto fail_nomem_mas_store;
mm->map_count++;
if (!(tmp->vm_flags & VM_WIPEONFORK))
@ -772,15 +779,28 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
if (tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
if (retval)
if (retval) {
mpnt = mas_find(&mas, ULONG_MAX);
goto loop_out;
}
}
/* a new mm has just been created */
retval = arch_dup_mmap(oldmm, mm);
loop_out:
mas_destroy(&mas);
if (!retval)
if (!retval) {
mt_set_in_rcu(mas.tree);
} else if (mpnt) {
/*
* The entire maple tree has already been duplicated. If the
* mmap duplication fails, mark the failure point with
* XA_ZERO_ENTRY. In exit_mmap(), if this marker is encountered,
* stop releasing VMAs that have not been duplicated after this
* point.
*/
mas_set_range(&mas, mpnt->vm_start, mpnt->vm_end - 1);
mas_store(&mas, XA_ZERO_ENTRY);
}
out:
mmap_write_unlock(mm);
flush_tlb_mm(oldmm);
@ -790,8 +810,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
uprobe_end_dup_mmap();
return retval;
fail_nomem_mas_store:
unlink_anon_vmas(tmp);
fail_nomem_anon_vma_fork:
mpol_put(vma_policy(tmp));
fail_nomem_policy:

View File

@ -391,9 +391,15 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
struct rt_mutex_waiter *top_waiter)
{
bool ret = false;
if (rt_mutex_waiter_less(waiter, top_waiter))
return true;
trace_android_vh_rt_mutex_steal(waiter->prio, top_waiter->prio, &ret);
if (ret)
return true;
#ifdef RT_MUTEX_BUILD_SPINLOCKS
/*
* Note that RT tasks are excluded from same priority (lateral)

View File

@ -7837,6 +7837,7 @@ static int __sched_setscheduler(struct task_struct *p,
if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
__setscheduler_params(p, attr);
__setscheduler_prio(p, newprio);
trace_android_rvh_setscheduler(p);
}
__setscheduler_uclamp(p, attr);

View File

@ -96,6 +96,7 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
unsigned int sysctl_sched_min_granularity = 750000ULL;
EXPORT_SYMBOL_GPL(sysctl_sched_min_granularity);
static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
/*
@ -105,6 +106,7 @@ static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
* (default: 0.75 msec)
*/
unsigned int sysctl_sched_idle_min_granularity = 750000ULL;
EXPORT_SYMBOL_GPL(sysctl_sched_idle_min_granularity);
/*
* This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity

View File

@ -4,6 +4,8 @@
* Copyright (c) 2018-2022 Oracle Corporation
* Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
* Matthew Wilcox <willy@infradead.org>
* Copyright (c) 2023 ByteDance
* Author: Peng Zhang <zhangpeng.00@bytedance.com>
*/
/*
@ -158,6 +160,11 @@ static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
}
static inline void mt_free_one(struct maple_node *node)
{
kmem_cache_free(maple_node_cache, node);
}
static inline void mt_free_bulk(size_t size, void __rcu **nodes)
{
kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
@ -199,6 +206,11 @@ static unsigned int mas_mt_height(struct ma_state *mas)
return mt_height(mas->tree);
}
static inline unsigned int mt_attr(struct maple_tree *mt)
{
return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK;
}
static inline enum maple_type mte_node_type(const struct maple_enode *entry)
{
return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
@ -5702,7 +5714,7 @@ void mas_destroy(struct ma_state *mas)
mt_free_bulk(count, (void __rcu **)&node->slot[1]);
total -= count;
}
kmem_cache_free(maple_node_cache, node);
mt_free_one(ma_mnode_ptr(node));
total--;
}
@ -6527,6 +6539,278 @@ void *mtree_erase(struct maple_tree *mt, unsigned long index)
}
EXPORT_SYMBOL(mtree_erase);
/*
* mas_dup_free() - Free an incomplete duplication of a tree.
* @mas: The maple state of a incomplete tree.
*
* The parameter @mas->node passed in indicates that the allocation failed on
* this node. This function frees all nodes starting from @mas->node in the
* reverse order of mas_dup_build(). There is no need to hold the source tree
* lock at this time.
*/
static void mas_dup_free(struct ma_state *mas)
{
struct maple_node *node;
enum maple_type type;
void __rcu **slots;
unsigned char count, i;
/* Maybe the first node allocation failed. */
if (mas_is_none(mas))
return;
while (!mte_is_root(mas->node)) {
mas_ascend(mas);
if (mas->offset) {
mas->offset--;
do {
mas_descend(mas);
mas->offset = mas_data_end(mas);
} while (!mte_is_leaf(mas->node));
mas_ascend(mas);
}
node = mte_to_node(mas->node);
type = mte_node_type(mas->node);
slots = ma_slots(node, type);
count = mas_data_end(mas) + 1;
for (i = 0; i < count; i++)
((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK;
mt_free_bulk(count, slots);
}
node = mte_to_node(mas->node);
mt_free_one(node);
}
/*
* mas_copy_node() - Copy a maple node and replace the parent.
* @mas: The maple state of source tree.
* @new_mas: The maple state of new tree.
* @parent: The parent of the new node.
*
* Copy @mas->node to @new_mas->node, set @parent to be the parent of
* @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM.
*/
static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas,
struct maple_pnode *parent)
{
struct maple_node *node = mte_to_node(mas->node);
struct maple_node *new_node = mte_to_node(new_mas->node);
unsigned long val;
/* Copy the node completely. */
memcpy(new_node, node, sizeof(struct maple_node));
/* Update the parent node pointer. */
val = (unsigned long)node->parent & MAPLE_NODE_MASK;
new_node->parent = ma_parent_ptr(val | (unsigned long)parent);
}
/*
* mas_dup_alloc() - Allocate child nodes for a maple node.
* @mas: The maple state of source tree.
* @new_mas: The maple state of new tree.
* @gfp: The GFP_FLAGS to use for allocations.
*
* This function allocates child nodes for @new_mas->node during the duplication
* process. If memory allocation fails, @mas is set to -ENOMEM.
*/
static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
gfp_t gfp)
{
struct maple_node *node = mte_to_node(mas->node);
struct maple_node *new_node = mte_to_node(new_mas->node);
enum maple_type type;
unsigned char request, count, i;
void __rcu **slots;
void __rcu **new_slots;
unsigned long val;
/* Allocate memory for child nodes. */
type = mte_node_type(mas->node);
new_slots = ma_slots(new_node, type);
request = mas_data_end(mas) + 1;
count = mt_alloc_bulk(gfp, request, (void **)new_slots);
if (unlikely(count < request)) {
memset(new_slots, 0, request * sizeof(void *));
mas_set_err(mas, -ENOMEM);
return;
}
/* Restore node type information in slots. */
slots = ma_slots(node, type);
for (i = 0; i < count; i++) {
val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
val &= MAPLE_NODE_MASK;
((unsigned long *)new_slots)[i] |= val;
}
}
/*
* mas_dup_build() - Build a new maple tree from a source tree
* @mas: The maple state of source tree, need to be in MAS_START state.
* @new_mas: The maple state of new tree, need to be in MAS_START state.
* @gfp: The GFP_FLAGS to use for allocations.
*
* This function builds a new tree in DFS preorder. If the memory allocation
* fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the
* last node. mas_dup_free() will free the incomplete duplication of a tree.
*
* Note that the attributes of the two trees need to be exactly the same, and the
* new tree needs to be empty, otherwise -EINVAL will be set in @mas.
*/
static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
gfp_t gfp)
{
struct maple_node *node;
struct maple_pnode *parent = NULL;
struct maple_enode *root;
enum maple_type type;
if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) ||
unlikely(!mtree_empty(new_mas->tree))) {
mas_set_err(mas, -EINVAL);
return;
}
root = mas_start(mas);
if (mas_is_ptr(mas) || mas_is_none(mas))
goto set_new_tree;
node = mt_alloc_one(gfp);
if (!node) {
new_mas->node = MAS_NONE;
mas_set_err(mas, -ENOMEM);
return;
}
type = mte_node_type(mas->node);
root = mt_mk_node(node, type);
new_mas->node = root;
new_mas->min = 0;
new_mas->max = ULONG_MAX;
root = mte_mk_root(root);
while (1) {
mas_copy_node(mas, new_mas, parent);
if (!mte_is_leaf(mas->node)) {
/* Only allocate child nodes for non-leaf nodes. */
mas_dup_alloc(mas, new_mas, gfp);
if (unlikely(mas_is_err(mas)))
return;
} else {
/*
* This is the last leaf node and duplication is
* completed.
*/
if (mas->max == ULONG_MAX)
goto done;
/* This is not the last leaf node and needs to go up. */
do {
mas_ascend(mas);
mas_ascend(new_mas);
} while (mas->offset == mas_data_end(mas));
/* Move to the next subtree. */
mas->offset++;
new_mas->offset++;
}
mas_descend(mas);
parent = ma_parent_ptr(mte_to_node(new_mas->node));
mas_descend(new_mas);
mas->offset = 0;
new_mas->offset = 0;
}
done:
/* Specially handle the parent of the root node. */
mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas));
set_new_tree:
/* Make them the same height */
new_mas->tree->ma_flags = mas->tree->ma_flags;
rcu_assign_pointer(new_mas->tree->ma_root, root);
}
/**
* __mt_dup(): Duplicate an entire maple tree
* @mt: The source maple tree
* @new: The new maple tree
* @gfp: The GFP_FLAGS to use for allocations
*
* This function duplicates a maple tree in Depth-First Search (DFS) pre-order
* traversal. It uses memcpy() to copy nodes in the source tree and allocate
* new child nodes in non-leaf nodes. The new node is exactly the same as the
* source node except for all the addresses stored in it. It will be faster than
* traversing all elements in the source tree and inserting them one by one into
* the new tree.
* The user needs to ensure that the attributes of the source tree and the new
* tree are the same, and the new tree needs to be an empty tree, otherwise
* -EINVAL will be returned.
* Note that the user needs to manually lock the source tree and the new tree.
*
* Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
* the attributes of the two trees are different or the new tree is not an empty
* tree.
*/
int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
{
int ret = 0;
MA_STATE(mas, mt, 0, 0);
MA_STATE(new_mas, new, 0, 0);
mas_dup_build(&mas, &new_mas, gfp);
if (unlikely(mas_is_err(&mas))) {
ret = xa_err(mas.node);
if (ret == -ENOMEM)
mas_dup_free(&new_mas);
}
return ret;
}
EXPORT_SYMBOL(__mt_dup);
/**
* mtree_dup(): Duplicate an entire maple tree
* @mt: The source maple tree
* @new: The new maple tree
* @gfp: The GFP_FLAGS to use for allocations
*
* This function duplicates a maple tree in Depth-First Search (DFS) pre-order
* traversal. It uses memcpy() to copy nodes in the source tree and allocate
* new child nodes in non-leaf nodes. The new node is exactly the same as the
* source node except for all the addresses stored in it. It will be faster than
* traversing all elements in the source tree and inserting them one by one into
* the new tree.
* The user needs to ensure that the attributes of the source tree and the new
* tree are the same, and the new tree needs to be an empty tree, otherwise
* -EINVAL will be returned.
*
* Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
* the attributes of the two trees are different or the new tree is not an empty
* tree.
*/
int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
{
int ret = 0;
MA_STATE(mas, mt, 0, 0);
MA_STATE(new_mas, new, 0, 0);
mas_lock(&new_mas);
mas_lock_nested(&mas, SINGLE_DEPTH_NESTING);
mas_dup_build(&mas, &new_mas, gfp);
mas_unlock(&mas);
if (unlikely(mas_is_err(&mas))) {
ret = xa_err(mas.node);
if (ret == -ENOMEM)
mas_dup_free(&new_mas);
}
mas_unlock(&new_mas);
return ret;
}
EXPORT_SYMBOL(mtree_dup);
/**
* __mt_destroy() - Walk and free all nodes of a locked maple tree.
* @mt: The maple tree
@ -6541,7 +6825,7 @@ void __mt_destroy(struct maple_tree *mt)
if (xa_is_node(root))
mte_destroy_walk(root, mt);
mt->ma_flags = 0;
mt->ma_flags = mt_attr(mt);
}
EXPORT_SYMBOL_GPL(__mt_destroy);

View File

@ -1671,47 +1671,48 @@ static noinline void __init bench_mt_for_each(struct maple_tree *mt)
#endif
/* check_forking - simulate the kernel forking sequence with the tree. */
static noinline void __init check_forking(struct maple_tree *mt)
static noinline void __init check_forking(void)
{
struct maple_tree newmt;
int i, nr_entries = 134;
struct maple_tree mt, newmt;
int i, nr_entries = 134, ret;
void *val;
MA_STATE(mas, mt, 0, 0);
MA_STATE(newmas, mt, 0, 0);
struct rw_semaphore newmt_lock;
MA_STATE(mas, &mt, 0, 0);
MA_STATE(newmas, &newmt, 0, 0);
struct rw_semaphore mt_lock, newmt_lock;
init_rwsem(&mt_lock);
init_rwsem(&newmt_lock);
for (i = 0; i <= nr_entries; i++)
mtree_store_range(mt, i*10, i*10 + 5,
xa_mk_value(i), GFP_KERNEL);
mt_init_flags(&mt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN);
mt_set_external_lock(&mt, &mt_lock);
mt_set_non_kernel(99999);
mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN);
mt_set_external_lock(&newmt, &newmt_lock);
newmas.tree = &newmt;
mas_reset(&newmas);
mas_reset(&mas);
down_write(&newmt_lock);
mas.index = 0;
mas.last = 0;
if (mas_expected_entries(&newmas, nr_entries)) {
down_write(&mt_lock);
for (i = 0; i <= nr_entries; i++) {
mas_set_range(&mas, i*10, i*10 + 5);
mas_store_gfp(&mas, xa_mk_value(i), GFP_KERNEL);
}
down_write_nested(&newmt_lock, SINGLE_DEPTH_NESTING);
ret = __mt_dup(&mt, &newmt, GFP_KERNEL);
if (ret) {
pr_err("OOM!");
BUG_ON(1);
}
rcu_read_lock();
mas_for_each(&mas, val, ULONG_MAX) {
newmas.index = mas.index;
newmas.last = mas.last;
mas_set(&newmas, 0);
mas_for_each(&newmas, val, ULONG_MAX)
mas_store(&newmas, val);
}
rcu_read_unlock();
mas_destroy(&newmas);
mas_destroy(&mas);
mt_validate(&newmt);
mt_set_non_kernel(0);
__mt_destroy(&newmt);
__mt_destroy(&mt);
up_write(&newmt_lock);
up_write(&mt_lock);
}
static noinline void __init check_iteration(struct maple_tree *mt)
@ -1815,49 +1816,51 @@ static noinline void __init check_mas_store_gfp(struct maple_tree *mt)
}
#if defined(BENCH_FORK)
static noinline void __init bench_forking(struct maple_tree *mt)
static noinline void __init bench_forking(void)
{
struct maple_tree newmt;
int i, nr_entries = 134, nr_fork = 80000;
struct maple_tree mt, newmt;
int i, nr_entries = 134, nr_fork = 80000, ret;
void *val;
MA_STATE(mas, mt, 0, 0);
MA_STATE(newmas, mt, 0, 0);
struct rw_semaphore newmt_lock;
MA_STATE(mas, &mt, 0, 0);
MA_STATE(newmas, &newmt, 0, 0);
struct rw_semaphore mt_lock, newmt_lock;
init_rwsem(&mt_lock);
init_rwsem(&newmt_lock);
mt_set_external_lock(&newmt, &newmt_lock);
for (i = 0; i <= nr_entries; i++)
mtree_store_range(mt, i*10, i*10 + 5,
xa_mk_value(i), GFP_KERNEL);
mt_init_flags(&mt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN);
mt_set_external_lock(&mt, &mt_lock);
down_write(&mt_lock);
for (i = 0; i <= nr_entries; i++) {
mas_set_range(&mas, i*10, i*10 + 5);
mas_store_gfp(&mas, xa_mk_value(i), GFP_KERNEL);
}
for (i = 0; i < nr_fork; i++) {
mt_set_non_kernel(99999);
mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE);
newmas.tree = &newmt;
mas_reset(&newmas);
mas_reset(&mas);
mas.index = 0;
mas.last = 0;
rcu_read_lock();
down_write(&newmt_lock);
if (mas_expected_entries(&newmas, nr_entries)) {
printk("OOM!");
mt_init_flags(&newmt,
MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN);
mt_set_external_lock(&newmt, &newmt_lock);
down_write_nested(&newmt_lock, SINGLE_DEPTH_NESTING);
ret = __mt_dup(&mt, &newmt, GFP_KERNEL);
if (ret) {
pr_err("OOM!");
BUG_ON(1);
}
mas_for_each(&mas, val, ULONG_MAX) {
newmas.index = mas.index;
newmas.last = mas.last;
mas_set(&newmas, 0);
mas_for_each(&newmas, val, ULONG_MAX)
mas_store(&newmas, val);
}
mas_destroy(&newmas);
rcu_read_unlock();
mt_validate(&newmt);
mt_set_non_kernel(0);
__mt_destroy(&newmt);
up_write(&newmt_lock);
}
mas_destroy(&mas);
__mt_destroy(&mt);
up_write(&mt_lock);
}
#endif
@ -2741,10 +2744,6 @@ static int __init maple_tree_seed(void)
pr_info("\nTEST STARTING\n\n");
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_root_expand(&tree);
mtree_destroy(&tree);
#if defined(BENCH_SLOT_STORE)
#define BENCH
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
@ -2775,9 +2774,7 @@ static int __init maple_tree_seed(void)
#endif
#if defined(BENCH_FORK)
#define BENCH
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
bench_forking(&tree);
mtree_destroy(&tree);
bench_forking();
goto skip;
#endif
#if defined(BENCH_MT_FOR_EACH)
@ -2789,13 +2786,15 @@ static int __init maple_tree_seed(void)
#endif
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_iteration(&tree);
check_root_expand(&tree);
mtree_destroy(&tree);
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_forking(&tree);
check_iteration(&tree);
mtree_destroy(&tree);
check_forking();
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_mas_store_gfp(&tree);
mtree_destroy(&tree);

View File

@ -438,6 +438,9 @@ struct page *__cma_alloc(struct cma *cma, unsigned long count,
int ret = -ENOMEM;
int num_attempts = 0;
int max_retries = 5;
const char *name = cma ? cma->name : NULL;
trace_cma_alloc_start(name, count, align);
if (WARN_ON_ONCE((gfp_mask & GFP_KERNEL) == 0 ||
(gfp_mask & ~(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY)) != 0))
@ -452,8 +455,6 @@ struct page *__cma_alloc(struct cma *cma, unsigned long count,
if (!count)
goto out;
trace_cma_alloc_start(cma->name, count, align);
mask = cma_bitmap_aligned_mask(cma, align);
offset = cma_bitmap_aligned_offset(cma, align);
bitmap_maxno = cma_bitmap_maxno(cma);
@ -522,8 +523,6 @@ struct page *__cma_alloc(struct cma *cma, unsigned long count,
start = bitmap_no + mask + 1;
}
trace_cma_alloc_finish(cma->name, pfn, page, count, align);
/*
* CMA can allocate multiple page blocks, which results in different
* blocks being marked with different tags. Reset the tags to ignore
@ -542,6 +541,7 @@ struct page *__cma_alloc(struct cma *cma, unsigned long count,
pr_debug("%s(): returned %p\n", __func__, page);
out:
trace_cma_alloc_finish(name, pfn, page, count, align);
if (page) {
count_vm_event(CMA_ALLOC_SUCCESS);
cma_sysfs_account_success_pages(cma, count);

View File

@ -3,7 +3,7 @@
obj-y := core.o
obj-$(CONFIG_DAMON_VADDR) += ops-common.o vaddr.o
obj-$(CONFIG_DAMON_PADDR) += ops-common.o paddr.o
obj-$(CONFIG_DAMON_SYSFS) += sysfs.o
obj-$(CONFIG_DAMON_SYSFS) += sysfs-common.o sysfs-schemes.o sysfs.o
obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o
obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o
obj-$(CONFIG_DAMON_LRU_SORT) += lru_sort.o
obj-$(CONFIG_DAMON_RECLAIM) += modules-common.o reclaim.o
obj-$(CONFIG_DAMON_LRU_SORT) += modules-common.o lru_sort.o

View File

@ -694,63 +694,88 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
}
static void damon_do_apply_schemes(struct damon_ctx *c,
struct damon_target *t,
struct damon_region *r)
/*
* damos_skip_charged_region() - Check if the given region or starting part of
* it is already charged for the DAMOS quota.
* @t: The target of the region.
* @rp: The pointer to the region.
* @s: The scheme to be applied.
*
* If a quota of a scheme has exceeded in a quota charge window, the scheme's
* action would applied to only a part of the target access pattern fulfilling
* regions. To avoid applying the scheme action to only already applied
* regions, DAMON skips applying the scheme action to the regions that charged
* in the previous charge window.
*
* This function checks if a given region should be skipped or not for the
* reason. If only the starting part of the region has previously charged,
* this function splits the region into two so that the second one covers the
* area that not charged in the previous charge widnow and saves the second
* region in *rp and returns false, so that the caller can apply DAMON action
* to the second one.
*
* Return: true if the region should be entirely skipped, false otherwise.
*/
static bool damos_skip_charged_region(struct damon_target *t,
struct damon_region **rp, struct damos *s)
{
struct damos *s;
damon_for_each_scheme(s, c) {
struct damon_region *r = *rp;
struct damos_quota *quota = &s->quota;
unsigned long sz = damon_sz_region(r);
struct timespec64 begin, end;
unsigned long sz_applied = 0;
if (!s->wmarks.activated)
continue;
/* Check the quota */
if (quota->esz && quota->charged_sz >= quota->esz)
continue;
unsigned long sz_to_skip;
/* Skip previously charged regions */
if (quota->charge_target_from) {
if (t != quota->charge_target_from)
continue;
return true;
if (r == damon_last_region(t)) {
quota->charge_target_from = NULL;
quota->charge_addr_from = 0;
continue;
return true;
}
if (quota->charge_addr_from &&
r->ar.end <= quota->charge_addr_from)
continue;
return true;
if (quota->charge_addr_from && r->ar.start <
quota->charge_addr_from) {
sz = ALIGN_DOWN(quota->charge_addr_from -
sz_to_skip = ALIGN_DOWN(quota->charge_addr_from -
r->ar.start, DAMON_MIN_REGION);
if (!sz) {
if (damon_sz_region(r) <=
DAMON_MIN_REGION)
continue;
sz = DAMON_MIN_REGION;
if (!sz_to_skip) {
if (damon_sz_region(r) <= DAMON_MIN_REGION)
return true;
sz_to_skip = DAMON_MIN_REGION;
}
damon_split_region_at(t, r, sz);
damon_split_region_at(t, r, sz_to_skip);
r = damon_next_region(r);
sz = damon_sz_region(r);
*rp = r;
}
quota->charge_target_from = NULL;
quota->charge_addr_from = 0;
}
return false;
}
if (!damos_valid_target(c, t, r, s))
continue;
static void damos_update_stat(struct damos *s,
unsigned long sz_tried, unsigned long sz_applied)
{
s->stat.nr_tried++;
s->stat.sz_tried += sz_tried;
if (sz_applied)
s->stat.nr_applied++;
s->stat.sz_applied += sz_applied;
}
static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
struct damon_region *r, struct damos *s)
{
struct damos_quota *quota = &s->quota;
unsigned long sz = damon_sz_region(r);
struct timespec64 begin, end;
unsigned long sz_applied = 0;
int err = 0;
/* Apply the scheme */
if (c->ops.apply_scheme) {
if (quota->esz &&
quota->charged_sz + sz > quota->esz) {
if (quota->esz && quota->charged_sz + sz > quota->esz) {
sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
DAMON_MIN_REGION);
if (!sz)
@ -758,6 +783,9 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
damon_split_region_at(t, r, sz);
}
ktime_get_coarse_ts64(&begin);
if (c->callback.before_damos_apply)
err = c->callback.before_damos_apply(c, t, r, s);
if (!err)
sz_applied = c->ops.apply_scheme(c, t, r, s);
ktime_get_coarse_ts64(&end);
quota->total_charged_ns += timespec64_to_ns(&end) -
@ -772,11 +800,32 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
r->age = 0;
update_stat:
s->stat.nr_tried++;
s->stat.sz_tried += sz;
if (sz_applied)
s->stat.nr_applied++;
s->stat.sz_applied += sz_applied;
damos_update_stat(s, sz, sz_applied);
}
static void damon_do_apply_schemes(struct damon_ctx *c,
struct damon_target *t,
struct damon_region *r)
{
struct damos *s;
damon_for_each_scheme(s, c) {
struct damos_quota *quota = &s->quota;
if (!s->wmarks.activated)
continue;
/* Check the quota */
if (quota->esz && quota->charged_sz >= quota->esz)
continue;
if (damos_skip_charged_region(t, &r, s))
continue;
if (!damos_valid_target(c, t, r, s))
continue;
damos_apply_scheme(c, t, r, s);
}
}
@ -803,27 +852,20 @@ static void damos_set_effective_quota(struct damos_quota *quota)
quota->esz = esz;
}
static void kdamond_apply_schemes(struct damon_ctx *c)
static void damos_adjust_quota(struct damon_ctx *c, struct damos *s)
{
struct damon_target *t;
struct damon_region *r, *next_r;
struct damos *s;
damon_for_each_scheme(s, c) {
struct damos_quota *quota = &s->quota;
struct damon_target *t;
struct damon_region *r;
unsigned long cumulated_sz;
unsigned int score, max_score = 0;
if (!s->wmarks.activated)
continue;
if (!quota->ms && !quota->sz)
continue;
return;
/* New charge window starts */
if (time_after_eq(jiffies, quota->charged_from +
msecs_to_jiffies(
quota->reset_interval))) {
msecs_to_jiffies(quota->reset_interval))) {
if (quota->esz && quota->charged_sz >= quota->esz)
s->stat.qt_exceeds++;
quota->total_charged_sz += quota->charged_sz;
@ -833,7 +875,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
}
if (!c->ops.get_scheme_score)
continue;
return;
/* Fill up the score histogram */
memset(quota->histogram, 0, sizeof(quota->histogram));
@ -841,8 +883,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
damon_for_each_region(r, t) {
if (!__damos_valid_target(r, s))
continue;
score = c->ops.get_scheme_score(
c, t, r, s);
score = c->ops.get_scheme_score(c, t, r, s);
quota->histogram[score] += damon_sz_region(r);
if (score > max_score)
max_score = score;
@ -856,6 +897,19 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
break;
}
quota->min_score = score;
}
static void kdamond_apply_schemes(struct damon_ctx *c)
{
struct damon_target *t;
struct damon_region *r, *next_r;
struct damos *s;
damon_for_each_scheme(s, c) {
if (!s->wmarks.activated)
continue;
damos_adjust_quota(c, s);
}
damon_for_each_target(t, c) {
@ -1176,6 +1230,7 @@ static int kdamond_fn(void *data)
if (ctx->callback.after_aggregation &&
ctx->callback.after_aggregation(ctx))
break;
if (!list_empty(&ctx->schemes))
kdamond_apply_schemes(ctx);
kdamond_reset_aggregated(ctx);
kdamond_split_regions(ctx);

View File

@ -8,10 +8,8 @@
#define pr_fmt(fmt) "damon-lru-sort: " fmt
#include <linux/damon.h>
#include <linux/ioport.h>
#include <linux/kstrtox.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
#include "modules-common.h"
@ -237,38 +235,31 @@ static int damon_lru_sort_turn(bool on)
return 0;
}
static struct delayed_work damon_lru_sort_timer;
static void damon_lru_sort_timer_fn(struct work_struct *work)
{
static bool last_enabled;
bool now_enabled;
now_enabled = enabled;
if (last_enabled != now_enabled) {
if (!damon_lru_sort_turn(now_enabled))
last_enabled = now_enabled;
else
enabled = last_enabled;
}
}
static DECLARE_DELAYED_WORK(damon_lru_sort_timer, damon_lru_sort_timer_fn);
static bool damon_lru_sort_initialized;
static int damon_lru_sort_enabled_store(const char *val,
const struct kernel_param *kp)
{
int rc = param_set_bool(val, kp);
bool is_enabled = enabled;
bool enable;
int err;
if (rc < 0)
return rc;
if (!damon_lru_sort_initialized)
return rc;
schedule_delayed_work(&damon_lru_sort_timer, 0);
err = kstrtobool(val, &enable);
if (err)
return err;
if (is_enabled == enable)
return 0;
/* Called before init function. The function will handle this. */
if (!ctx)
goto set_param_out;
err = damon_lru_sort_turn(enable);
if (err)
return err;
set_param_out:
enabled = enable;
return err;
}
static const struct kernel_param_ops enabled_param_ops = {
@ -314,29 +305,19 @@ static int damon_lru_sort_after_wmarks_check(struct damon_ctx *c)
static int __init damon_lru_sort_init(void)
{
ctx = damon_new_ctx();
if (!ctx)
return -ENOMEM;
int err = damon_modules_new_paddr_ctx_target(&ctx, &target);
if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
damon_destroy_ctx(ctx);
return -EINVAL;
}
if (err)
return err;
ctx->callback.after_wmarks_check = damon_lru_sort_after_wmarks_check;
ctx->callback.after_aggregation = damon_lru_sort_after_aggregation;
target = damon_new_target();
if (!target) {
damon_destroy_ctx(ctx);
return -ENOMEM;
}
damon_add_target(ctx, target);
/* 'enabled' has set before this function, probably via command line */
if (enabled)
err = damon_lru_sort_turn(true);
schedule_delayed_work(&damon_lru_sort_timer, 0);
damon_lru_sort_initialized = true;
return 0;
return err;
}
module_init(damon_lru_sort_init);

42
mm/damon/modules-common.c Normal file
View File

@ -0,0 +1,42 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Common Primitives for DAMON Modules
*
* Author: SeongJae Park <sjpark@amazon.de>
*/
#include <linux/damon.h>
#include "modules-common.h"
/*
* Allocate, set, and return a DAMON context for the physical address space.
* @ctxp: Pointer to save the point to the newly created context
* @targetp: Pointer to save the point to the newly created target
*/
int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp,
struct damon_target **targetp)
{
struct damon_ctx *ctx;
struct damon_target *target;
ctx = damon_new_ctx();
if (!ctx)
return -ENOMEM;
if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
damon_destroy_ctx(ctx);
return -EINVAL;
}
target = damon_new_target();
if (!target) {
damon_destroy_ctx(ctx);
return -ENOMEM;
}
damon_add_target(ctx, target);
*ctxp = ctx;
*targetp = target;
return 0;
}

View File

@ -44,3 +44,6 @@
0400); \
module_param_named(nr_##qt_exceed_name, stat.qt_exceeds, ulong, \
0400);
int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp,
struct damon_target **targetp);

View File

@ -35,21 +35,12 @@ struct page *damon_get_page(unsigned long pfn)
void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr)
{
bool referenced = false;
struct page *page = damon_get_page(pte_pfn(*pte));
if (!page)
return;
if (ptep_test_and_clear_young(vma, addr, pte))
referenced = true;
#ifdef CONFIG_MMU_NOTIFIER
if (mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE))
referenced = true;
#endif /* CONFIG_MMU_NOTIFIER */
if (referenced)
if (ptep_clear_young_notify(vma, addr, pte))
set_page_young(page);
set_page_idle(page);
@ -59,21 +50,12 @@ void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr
void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
bool referenced = false;
struct page *page = damon_get_page(pmd_pfn(*pmd));
if (!page)
return;
if (pmdp_test_and_clear_young(vma, addr, pmd))
referenced = true;
#ifdef CONFIG_MMU_NOTIFIER
if (mmu_notifier_clear_young(vma->vm_mm, addr, addr + HPAGE_PMD_SIZE))
referenced = true;
#endif /* CONFIG_MMU_NOTIFIER */
if (referenced)
if (pmdp_clear_young_notify(vma, addr, pmd))
set_page_young(page);
set_page_idle(page);

View File

@ -8,10 +8,8 @@
#define pr_fmt(fmt) "damon-reclaim: " fmt
#include <linux/damon.h>
#include <linux/ioport.h>
#include <linux/kstrtox.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
#include "modules-common.h"
@ -183,38 +181,31 @@ static int damon_reclaim_turn(bool on)
return 0;
}
static struct delayed_work damon_reclaim_timer;
static void damon_reclaim_timer_fn(struct work_struct *work)
{
static bool last_enabled;
bool now_enabled;
now_enabled = enabled;
if (last_enabled != now_enabled) {
if (!damon_reclaim_turn(now_enabled))
last_enabled = now_enabled;
else
enabled = last_enabled;
}
}
static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn);
static bool damon_reclaim_initialized;
static int damon_reclaim_enabled_store(const char *val,
const struct kernel_param *kp)
{
int rc = param_set_bool(val, kp);
bool is_enabled = enabled;
bool enable;
int err;
if (rc < 0)
return rc;
err = kstrtobool(val, &enable);
if (err)
return err;
/* system_wq might not initialized yet */
if (!damon_reclaim_initialized)
return rc;
schedule_delayed_work(&damon_reclaim_timer, 0);
if (is_enabled == enable)
return 0;
/* Called before init function. The function will handle this. */
if (!ctx)
goto set_param_out;
err = damon_reclaim_turn(enable);
if (err)
return err;
set_param_out:
enabled = enable;
return err;
}
static const struct kernel_param_ops enabled_param_ops = {
@ -256,29 +247,19 @@ static int damon_reclaim_after_wmarks_check(struct damon_ctx *c)
static int __init damon_reclaim_init(void)
{
ctx = damon_new_ctx();
if (!ctx)
return -ENOMEM;
int err = damon_modules_new_paddr_ctx_target(&ctx, &target);
if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
damon_destroy_ctx(ctx);
return -EINVAL;
}
if (err)
return err;
ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check;
ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
target = damon_new_target();
if (!target) {
damon_destroy_ctx(ctx);
return -ENOMEM;
}
damon_add_target(ctx, target);
/* 'enabled' has set before this function, probably via command line */
if (enabled)
err = damon_reclaim_turn(true);
schedule_delayed_work(&damon_reclaim_timer, 0);
damon_reclaim_initialized = true;
return 0;
return err;
}
module_init(damon_reclaim_init);

107
mm/damon/sysfs-common.c Normal file
View File

@ -0,0 +1,107 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Common Primitives for DAMON Sysfs Interface
*
* Author: SeongJae Park <sj@kernel.org>
*/
#include <linux/slab.h>
#include "sysfs-common.h"
DEFINE_MUTEX(damon_sysfs_lock);
/*
* unsigned long range directory
*/
struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
unsigned long min,
unsigned long max)
{
struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range),
GFP_KERNEL);
if (!range)
return NULL;
range->kobj = (struct kobject){};
range->min = min;
range->max = max;
return range;
}
static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
struct damon_sysfs_ul_range *range = container_of(kobj,
struct damon_sysfs_ul_range, kobj);
return sysfs_emit(buf, "%lu\n", range->min);
}
static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct damon_sysfs_ul_range *range = container_of(kobj,
struct damon_sysfs_ul_range, kobj);
unsigned long min;
int err;
err = kstrtoul(buf, 0, &min);
if (err)
return err;
range->min = min;
return count;
}
static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
struct damon_sysfs_ul_range *range = container_of(kobj,
struct damon_sysfs_ul_range, kobj);
return sysfs_emit(buf, "%lu\n", range->max);
}
static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct damon_sysfs_ul_range *range = container_of(kobj,
struct damon_sysfs_ul_range, kobj);
unsigned long max;
int err;
err = kstrtoul(buf, 0, &max);
if (err)
return err;
range->max = max;
return count;
}
void damon_sysfs_ul_range_release(struct kobject *kobj)
{
kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj));
}
static struct kobj_attribute damon_sysfs_ul_range_min_attr =
__ATTR_RW_MODE(min, 0600);
static struct kobj_attribute damon_sysfs_ul_range_max_attr =
__ATTR_RW_MODE(max, 0600);
static struct attribute *damon_sysfs_ul_range_attrs[] = {
&damon_sysfs_ul_range_min_attr.attr,
&damon_sysfs_ul_range_max_attr.attr,
NULL,
};
ATTRIBUTE_GROUPS(damon_sysfs_ul_range);
struct kobj_type damon_sysfs_ul_range_ktype = {
.release = damon_sysfs_ul_range_release,
.sysfs_ops = &kobj_sysfs_ops,
.default_groups = damon_sysfs_ul_range_groups,
};

58
mm/damon/sysfs-common.h Normal file
View File

@ -0,0 +1,58 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Common Primitives for DAMON Sysfs Interface
*
* Author: SeongJae Park <sj@kernel.org>
*/
#include <linux/damon.h>
#include <linux/kobject.h>
extern struct mutex damon_sysfs_lock;
struct damon_sysfs_ul_range {
struct kobject kobj;
unsigned long min;
unsigned long max;
};
struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
unsigned long min,
unsigned long max);
void damon_sysfs_ul_range_release(struct kobject *kobj);
extern struct kobj_type damon_sysfs_ul_range_ktype;
/*
* schemes directory
*/
struct damon_sysfs_schemes {
struct kobject kobj;
struct damon_sysfs_scheme **schemes_arr;
int nr;
};
struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void);
void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes);
extern struct kobj_type damon_sysfs_schemes_ktype;
int damon_sysfs_set_schemes(struct damon_ctx *ctx,
struct damon_sysfs_schemes *sysfs_schemes);
void damon_sysfs_schemes_update_stats(
struct damon_sysfs_schemes *sysfs_schemes,
struct damon_ctx *ctx);
int damon_sysfs_schemes_update_regions_start(
struct damon_sysfs_schemes *sysfs_schemes,
struct damon_ctx *ctx, bool total_bytes_only);
bool damos_sysfs_regions_upd_done(void);
int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx);
int damon_sysfs_schemes_clear_regions(
struct damon_sysfs_schemes *sysfs_schemes,
struct damon_ctx *ctx);

1458
mm/damon/sysfs-schemes.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2967,7 +2967,7 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,
/*
* NOTE! This will make us return with VM_FAULT_RETRY, but with
* the mmap_lock still held. That's how FAULT_FLAG_RETRY_NOWAIT
* the fault lock still held. That's how FAULT_FLAG_RETRY_NOWAIT
* is supposed to work. We have way too many special cases..
*/
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
@ -2977,13 +2977,14 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,
if (vmf->flags & FAULT_FLAG_KILLABLE) {
if (__folio_lock_killable(folio)) {
/*
* We didn't have the right flags to drop the mmap_lock,
* but all fault_handlers only check for fatal signals
* if we return VM_FAULT_RETRY, so we need to drop the
* mmap_lock here and return 0 if we don't have a fpin.
* We didn't have the right flags to drop the
* fault lock, but all fault_handlers only check
* for fatal signals if we return VM_FAULT_RETRY,
* so we need to drop the fault lock here and
* return 0 if we don't have a fpin.
*/
if (*fpin == NULL)
mmap_read_unlock(vmf->vma->vm_mm);
release_fault_lock(vmf);
return 0;
}
} else

View File

@ -411,6 +411,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
* be 0. This will underflow and is okay.
*/
next = mas_find(&mas, ceiling - 1);
if (unlikely(xa_is_zero(next)))
next = NULL;
/*
* Hide vma from rmap and truncate_pagecache before freeing
@ -432,6 +434,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
&& !is_vm_hugetlb_page(next)) {
vma = next;
next = mas_find(&mas, ceiling - 1);
if (unlikely(xa_is_zero(next)))
next = NULL;
if (mm_wr_locked)
vma_start_write(vma);
unlink_anon_vmas(vma);
@ -1736,7 +1740,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
do {
unmap_single_vma(tlb, vma, start_addr, end_addr, &details,
mm_wr_locked);
} while ((vma = mas_find(&mas, end_t - 1)) != NULL);
vma = mas_find(&mas, end_t - 1);
} while (vma && likely(!xa_is_zero(vma)));
mmu_notifier_invalidate_range_end(&range);
}
@ -3099,6 +3104,36 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
count_vm_event(PGREUSE);
}
/*
* We could add a bitflag somewhere, but for now, we know that all
* vm_ops that have a ->map_pages have been audited and don't need
* the mmap_lock to be held.
*/
static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
if (vma->vm_ops->map_pages || !(vmf->flags & FAULT_FLAG_VMA_LOCK))
return 0;
vma_end_read(vma);
return VM_FAULT_RETRY;
}
static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
if (likely(vma->anon_vma))
return 0;
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
vma_end_read(vma);
return VM_FAULT_RETRY;
}
if (__anon_vma_prepare(vma))
return VM_FAULT_OOM;
return 0;
}
/*
* Handle the case of a page which we actually need to copy to a new page,
* either due to COW or unsharing.
@ -3126,12 +3161,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
pte_t entry;
int page_copied = 0;
struct mmu_notifier_range range;
int ret;
vm_fault_t ret;
delayacct_wpcopy_start();
if (unlikely(anon_vma_prepare(vma)))
goto oom;
ret = vmf_anon_prepare(vmf);
if (unlikely(ret))
goto out;
if (is_zero_pfn(pte_pfn(vmf->orig_pte))) {
new_page = alloc_zeroed_user_highpage_movable(vma,
@ -3139,13 +3175,14 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
if (!new_page)
goto oom;
} else {
int err;
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
vmf->address);
if (!new_page)
goto oom;
ret = __wp_page_copy_user(new_page, old_page, vmf);
if (ret) {
err = __wp_page_copy_user(new_page, old_page, vmf);
if (err) {
/*
* COW failed, if the fault was solved by other,
* it's fine. If not, userspace would re-fault on
@ -3158,7 +3195,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
put_page(old_page);
delayacct_wpcopy_end();
return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
}
kmsan_copy_page_meta(new_page, old_page);
}
@ -3271,11 +3308,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
oom_free_new:
put_page(new_page);
oom:
ret = VM_FAULT_OOM;
out:
if (old_page)
put_page(old_page);
delayacct_wpcopy_end();
return VM_FAULT_OOM;
return ret;
}
/**
@ -3324,10 +3363,9 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
vm_fault_t ret;
pte_unmap_unlock(vmf->pte, vmf->ptl);
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
vma_end_read(vmf->vma);
return VM_FAULT_RETRY;
}
ret = vmf_can_call_fault(vmf);
if (ret)
return ret;
vmf->flags |= FAULT_FLAG_MKWRITE;
ret = vma->vm_ops->pfn_mkwrite(vmf);
@ -3351,10 +3389,10 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
vm_fault_t tmp;
pte_unmap_unlock(vmf->pte, vmf->ptl);
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
tmp = vmf_can_call_fault(vmf);
if (tmp) {
put_page(vmf->page);
vma_end_read(vmf->vma);
return VM_FAULT_RETRY;
return tmp;
}
tmp = do_page_mkwrite(vmf);
@ -3510,12 +3548,6 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
return wp_page_shared(vmf);
}
copy:
if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma->anon_vma) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
vma_end_read(vmf->vma);
return VM_FAULT_RETRY;
}
/*
* Ok, we need to copy. Oh, well..
*/
@ -4623,10 +4655,9 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf)
return ret;
}
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
vma_end_read(vmf->vma);
return VM_FAULT_RETRY;
}
ret = vmf_can_call_fault(vmf);
if (ret)
return ret;
ret = __do_fault(vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
@ -4644,13 +4675,11 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret;
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
vma_end_read(vma);
return VM_FAULT_RETRY;
}
if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM;
ret = vmf_can_call_fault(vmf);
if (!ret)
ret = vmf_anon_prepare(vmf);
if (ret)
return ret;
vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
if (!vmf->cow_page)
@ -4688,10 +4717,9 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret, tmp;
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
vma_end_read(vma);
return VM_FAULT_RETRY;
}
ret = vmf_can_call_fault(vmf);
if (ret)
return ret;
ret = __do_fault(vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
@ -5514,7 +5542,7 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
* concurrent mremap() with MREMAP_DONTUNMAP could dissociate the VMA
* from its anon_vma.
*/
if (unlikely(!vma->anon_vma))
if (vma_is_anonymous(vma) && !vma->anon_vma)
goto inval_end_read;
/* Check since vm_start/vm_end might change before we lock the VMA */

View File

@ -3303,10 +3303,11 @@ void exit_mmap(struct mm_struct *mm)
arch_exit_mmap(mm);
vma = mas_find(&mas, ULONG_MAX);
if (!vma) {
if (!vma || unlikely(xa_is_zero(vma))) {
/* Can happen if dup_mmap() received an OOM */
mmap_read_unlock(mm);
return;
mmap_write_lock(mm);
goto destroy;
}
lru_add_drain();
@ -3339,11 +3340,13 @@ void exit_mmap(struct mm_struct *mm)
remove_vma(vma, true);
count++;
cond_resched();
} while ((vma = mas_find(&mas, ULONG_MAX)) != NULL);
vma = mas_find(&mas, ULONG_MAX);
} while (vma && likely(!xa_is_zero(vma)));
BUG_ON(count != mm->map_count);
trace_exit_mmap(mm);
destroy:
__mt_destroy(&mm->mm_mt);
mmap_write_unlock(mm);
vm_unacct_memory(nr_accounted);

View File

@ -420,7 +420,7 @@ static int dump_task(struct task_struct *p, void *arg)
* State information includes task's pid, uid, tgid, vm size, rss,
* pgtables_bytes, swapents, oom_score_adj value, and name.
*/
static void dump_tasks(struct oom_control *oc)
void dump_tasks(struct oom_control *oc)
{
pr_info("Tasks state (memory values in pages):\n");
pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n");
@ -436,6 +436,7 @@ static void dump_tasks(struct oom_control *oc)
rcu_read_unlock();
}
}
EXPORT_SYMBOL_GPL(dump_tasks);
static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
{

View File

@ -167,6 +167,7 @@ static void read_pages(struct readahead_control *rac)
psi_memstall_enter(&rac->_pflags);
blk_start_plug(&plug);
trace_android_vh_read_pages(rac);
if (aops->readahead) {
aops->readahead(rac);
/*

View File

@ -243,7 +243,7 @@ static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb)
goto frame_finish;
#endif
e = rcu_dereference(net->nf.hooks_bridge[NF_BR_PRE_ROUTING]);
e = rcu_dereference(get_nf_hooks_bridge(net)[NF_BR_PRE_ROUTING]);
if (!e)
goto frame_finish;

View File

@ -1016,7 +1016,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
unsigned int i;
int ret;
e = rcu_dereference(net->nf.hooks_bridge[hook]);
e = rcu_dereference(get_nf_hooks_bridge(net)[hook]);
if (!e)
return okfn(net, sk, skb);

View File

@ -1093,9 +1093,13 @@ void __init net_ns_init(void)
struct net_generic *ng;
#ifdef CONFIG_NET_NS
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
/* Allocate size for struct ext_net instead of struct net
* to fix a KMI issue when CONFIG_NETFILTER_FAMILY_BRIDGE
* is enabled
*/
net_cachep = kmem_cache_create("net_namespace", sizeof(struct ext_net),
SMP_CACHE_BYTES,
SLAB_PANIC|SLAB_ACCOUNT, NULL);
SLAB_PANIC | SLAB_ACCOUNT, NULL);
/* Create workqueue for cleanup */
netns_wq = create_singlethread_workqueue("netns");

Some files were not shown because too many files have changed in this diff Show More