b84ad15be5
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmbCv24ACgkQONu9yGCS aT7lNRAAzP2lSCUHROaMTldoQdahqoWqwFSiMI9p32HYLTerpg1GHVsi1IUvD+pv zhmUG9w+ACbSbZ9337G61FeEDCIBzgqaIXLCtbK2Be9nWMa9I1ZtMSFUKoSmVJBw YbrI/UOscJmAf44G6DeMp+N+/S2o7INK463u51SYjufo/zhFF8KsYElm23p06kgn lTkkUAoo9mSVvEr64zbjwLrWyBWTlcvYH/xrkWeJWXl+hBv0K5Ig9IBm0sc0DSQR fErADzDLFkmD9pduZbMwbzUUzC8ST41KKjTgClaHQhSMeoLoWT8CJM5Swwds4XVE JkoClkqnj3+stYFpLFm9UUgZ12wu/9slzgRCN6fTraSNT8gE9F9BRJXFGL+3S5OO oHKZYEEPTZDsD3PihgufJ4Ft27+KpMUzAgQUmVH/y47wrVJ2pf4fCK8LKT0MbjBi pjZaDRCxwo1aORL3+jYJBVRecrNqQ0DhacYOKznhb2KKeaHojIwLaE6k/W/0Q8U5 1uMYv+NJ3LWDNzGcNUTCfNtuDELOpkp24Xc8RN0MK2iMMMyfjMpgKssjSBZtz0QW NH0UVpfiWKECKH+m03NeFnYdMuK8/VyM8vatkcemz0FfgJP2UazeiVwSujfS2r2S 0TtsCMPP3kgKa9mAnni7lQs4wkG+OTNDNZqbuDqFZ1rHUS2Usrg= =8i2e -----END PGP SIGNATURE----- Merge 5.10.224 into android12-5.10-lts Changes in 5.10.224 EDAC/skx_common: Add new ADXL components for 2-level memory EDAC, i10nm: make skx_common.o a separate module platform/chrome: cros_ec_debugfs: fix wrong EC message version hfsplus: fix to avoid false alarm of circular locking x86/of: Return consistent error type from x86_of_pci_irq_enable() x86/pci/intel_mid_pci: Fix PCIBIOS_* return code handling x86/pci/xen: Fix PCIBIOS_* return code handling x86/platform/iosf_mbi: Convert PCIBIOS_* return codes to errnos hwmon: (adt7475) Fix default duty on fan is disabled pwm: stm32: Always do lazy disabling hwmon: (max6697) Fix underflow when writing limit attributes hwmon: (max6697) Fix swapped temp{1,8} critical alarms arm64: dts: qcom: sdm845: add power-domain to UFS PHY soc: qcom: rpmh-rsc: Ensure irqs aren't disabled by rpmh_rsc_send_data() callers arm64: dts: qcom: msm8996: specify UFS core_clk frequencies soc: qcom: pdr: protect locator_addr with the main mutex soc: qcom: pdr: fix parsing of domains lists arm64: dts: rockchip: Increase VOP clk rate on RK3328 ARM: dts: imx6qdl-kontron-samx6i: move phy reset into phy-node ARM: dts: imx6qdl-kontron-samx6i: fix PHY reset ARM: dts: imx6qdl-kontron-samx6i: fix board reset ARM: dts: imx6qdl-kontron-samx6i: fix SPI0 chip selects ARM: dts: imx6qdl-kontron-samx6i: fix PCIe reset polarity arm64: dts: mediatek: mt8183-kukui: Drop bogus output-enable property arm64: dts: mediatek: mt7622: fix "emmc" pinctrl mux arm64: dts: amlogic: gx: correct hdmi clocks m68k: atari: Fix TT bootup freeze / unexpected (SCU) interrupt messages x86/xen: Convert comma to semicolon m68k: cmpxchg: Fix return value for default case in __arch_xchg() ARM: pxa: spitz: use gpio descriptors for audio ARM: spitz: fix GPIO assignment for backlight firmware: turris-mox-rwtm: Fix checking return value of wait_for_completion_timeout() firmware: turris-mox-rwtm: Initialize completion before mailbox wifi: brcmsmac: LCN PHY code is used for BCM4313 2G-only device selftests/bpf: Fix prog numbers in test_sockmap net: esp: cleanup esp_output_tail_tcp() in case of unsupported ESPINTCP net/smc: Allow SMC-D 1MB DMB allocations net/smc: set rmb's SG_MAX_SINGLE_ALLOC limitation only when CONFIG_ARCH_NO_SG_CHAIN is defined selftests/bpf: Check length of recv in test_sockmap lib: objagg: Fix general protection fault mlxsw: spectrum_acl_erp: Fix object nesting warning mlxsw: spectrum_acl_bloom_filter: Make mlxsw_sp_acl_bf_key_encode() more flexible mlxsw: spectrum_acl: Fix ACL scale regression and firmware errors ath11k: dp: stop rx pktlog before suspend wifi: ath11k: fix wrong handling of CCMP256 and GCMP ciphers wifi: cfg80211: fix typo in cfg80211_calculate_bitrate_he() wifi: cfg80211: handle 2x996 RU allocation in cfg80211_calculate_bitrate_he() net: fec: Refactor: #define magic constants net: fec: Fix FEC_ECR_EN1588 being cleared on link-down ipvs: Avoid unnecessary calls to skb_is_gso_sctp netfilter: nf_tables: rise cap on SELinux secmark context perf/x86/intel/pt: Fix pt_topa_entry_for_page() address calculation perf: Fix perf_aux_size() for greater-than 32-bit size perf: Prevent passing zero nr_pages to rb_alloc_aux() qed: Improve the stack space of filter_config() wifi: virt_wifi: avoid reporting connection success with wrong SSID gss_krb5: Fix the error handling path for crypto_sync_skcipher_setkey wifi: virt_wifi: don't use strlen() in const context selftests/bpf: Close fd in error path in drop_on_reuseport bpf: annotate BTF show functions with __printf bna: adjust 'name' buf size of bna_tcb and bna_ccb structures bpf: Eliminate remaining "make W=1" warnings in kernel/bpf/btf.o selftests: forwarding: devlink_lib: Wait for udev events after reloading xdp: fix invalid wait context of page_pool_destroy() drm/panel: boe-tv101wum-nl6: If prepare fails, disable GPIO before regulators drm/panel: boe-tv101wum-nl6: Check for errors on the NOP in prepare() media: dvb-usb: Fix unexpected infinite loop in dvb_usb_read_remote_control() media: imon: Fix race getting ictx->lock saa7134: Unchecked i2c_transfer function result fixed media: uvcvideo: Allow entity-defined get_info and get_cur media: uvcvideo: Override default flags media: renesas: vsp1: Fix _irqsave and _irq mix media: renesas: vsp1: Store RPF partition configuration per RPF instance leds: trigger: Unregister sysfs attributes before calling deactivate() perf report: Fix condition in sort__sym_cmp() drm/etnaviv: fix DMA direction handling for cached RW buffers drm/qxl: Add check for drm_cvt_mode Revert "leds: led-core: Fix refcount leak in of_led_get()" ext4: fix infinite loop when replaying fast_commit media: venus: flush all buffers in output plane streamoff mfd: omap-usb-tll: Use struct_size to allocate tll xprtrdma: Rename frwr_release_mr() xprtrdma: Fix rpcrdma_reqs_reset() SUNRPC: avoid soft lockup when transmitting UDP to reachable server. ext4: avoid writing unitialized memory to disk in EA inodes sparc64: Fix incorrect function signature and add prototype for prom_cif_init SUNRPC: Fixup gss_status tracepoint error output PCI: Fix resource double counting on remove & rescan coresight: Fix ref leak when of_coresight_parse_endpoint() fails Input: qt1050 - handle CHIP_ID reading error RDMA/mlx4: Fix truncated output warning in mad.c RDMA/mlx4: Fix truncated output warning in alias_GUID.c RDMA/rxe: Don't set BTH_ACK_MASK for UC or UD QPs ASoC: max98088: Check for clk_prepare_enable() error mtd: make mtd_test.c a separate module RDMA/device: Return error earlier if port in not valid Input: elan_i2c - do not leave interrupt disabled on suspend failure MIPS: Octeron: remove source file executable bit powerpc/xmon: Fix disassembly CPU feature checks macintosh/therm_windtunnel: fix module unload. RDMA/hns: Fix missing pagesize and alignment check in FRMR bnxt_re: Fix imm_data endianness netfilter: ctnetlink: use helper function to calculate expect ID net: dsa: mv88e6xxx: Limit chip-wide frame size config to CPU ports net: dsa: b53: Limit chip-wide jumbo frame config to CPU ports pinctrl: rockchip: update rk3308 iomux routes pinctrl: core: fix possible memory leak when pinctrl_enable() fails pinctrl: single: fix possible memory leak when pinctrl_enable() fails pinctrl: ti: ti-iodelay: Drop if block with always false condition pinctrl: ti: ti-iodelay: fix possible memory leak when pinctrl_enable() fails pinctrl: freescale: mxs: Fix refcount of child fs/proc/task_mmu: indicate PM_FILE for PMD-mapped file THP fs/nilfs2: remove some unused macros to tame gcc nilfs2: avoid undefined behavior in nilfs_cnt32_ge macro rtc: interface: Add RTC offset to alarm after fix-up dt-bindings: thermal: correct thermal zone node name limit tick/broadcast: Make takeover of broadcast hrtimer reliable net: netconsole: Disable target before netpoll cleanup af_packet: Handle outgoing VLAN packets without hardware offloading ipv6: take care of scope when choosing the src addr sched/fair: set_load_weight() must also call reweight_task() for SCHED_IDLE tasks char: tpm: Fix possible memory leak in tpm_bios_measurements_open() media: venus: fix use after free in vdec_close hfs: fix to initialize fields of hfs_inode_info after hfs_alloc_inode() ext2: Verify bitmap and itable block numbers before using them drm/gma500: fix null pointer dereference in cdv_intel_lvds_get_modes drm/gma500: fix null pointer dereference in psb_intel_lvds_get_modes scsi: qla2xxx: Fix optrom version displayed in FDMI drm/amd/display: Check for NULL pointer sched/fair: Use all little CPUs for CPU-bound workloads apparmor: use kvfree_sensitive to free data->data task_work: s/task_work_cancel()/task_work_cancel_func()/ task_work: Introduce task_work_cancel() again udf: Avoid using corrupted block bitmap buffer m68k: amiga: Turn off Warp1260 interrupts during boot ext4: check dot and dotdot of dx_root before making dir indexed ext4: make sure the first directory block is not a hole wifi: mwifiex: Fix interface type change leds: ss4200: Convert PCIBIOS_* return codes to errnos jbd2: make jbd2_journal_get_max_txn_bufs() internal KVM: VMX: Split out the non-virtualization part of vmx_interrupt_blocked() tools/memory-model: Fix bug in lock.cat hwrng: amd - Convert PCIBIOS_* return codes to errnos PCI: hv: Return zero, not garbage, when reading PCI_INTERRUPT_PIN PCI: rockchip: Use GPIOD_OUT_LOW flag while requesting ep_gpio binder: fix hang of unregistered readers dev/parport: fix the array out-of-bounds risk scsi: qla2xxx: Return ENOBUFS if sg_cnt is more than one for ELS cmds f2fs: fix to don't dirty inode for readonly filesystem clk: davinci: da8xx-cfgchip: Initialize clk_init_data before use ubi: eba: properly rollback inside self_check_eba decompress_bunzip2: fix rare decompression failure kbuild: Fix '-S -c' in x86 stack protector scripts kobject_uevent: Fix OOB access within zap_modalias_env() devres: Fix devm_krealloc() wasting memory rtc: cmos: Fix return value of nvmem callbacks scsi: qla2xxx: During vport delete send async logout explicitly scsi: qla2xxx: Fix for possible memory corruption scsi: qla2xxx: Fix flash read failure scsi: qla2xxx: Complete command early within lock scsi: qla2xxx: validate nvme_local_port correctly perf/x86/intel/pt: Fix topa_entry base length perf/x86/intel/pt: Fix a topa_entry base address calculation rtc: isl1208: Fix return value of nvmem callbacks watchdog/perf: properly initialize the turbo mode timestamp and rearm counter platform: mips: cpu_hwmon: Disable driver on unsupported hardware RDMA/iwcm: Fix a use-after-free related to destroying CM IDs selftests/sigaltstack: Fix ppc64 GCC build rbd: don't assume rbd_is_lock_owner() for exclusive mappings MIPS: ip30: ip30-console: Add missing include MIPS: Loongson64: env: Hook up Loongsson-2K drm/panfrost: Mark simple_ondemand governor as softdep rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings Bluetooth: btusb: Add RTL8852BE device 0489:e125 to device tables Bluetooth: btusb: Add Realtek RTL8852BE support ID 0x13d3:0x3591 nilfs2: handle inconsistent state in nilfs_btnode_create_block() io_uring/io-wq: limit retrying worker initialisation kernel: rerun task_work while freezing in get_signal() kdb: address -Wformat-security warnings kdb: Use the passed prompt in kdb_position_cursor() jfs: Fix array-index-out-of-bounds in diFree um: time-travel: fix time-travel-start option f2fs: fix start segno of large section libbpf: Fix no-args func prototype BTF dumping syntax dma: fix call order in dmam_free_coherent MIPS: SMP-CPS: Fix address for GCR_ACCESS register for CM3 and later ipv4: Fix incorrect source address in Record Route option net: bonding: correctly annotate RCU in bond_should_notify_peers() netfilter: nft_set_pipapo_avx2: disable softinterrupts tipc: Return non-zero value from tipc_udp_addr2str() on error net: stmmac: Correct byte order of perfect_match net: nexthop: Initialize all fields in dumped nexthops bpf: Fix a segment issue when downgrading gso_size mISDN: Fix a use after free in hfcmulti_tx() apparmor: Fix null pointer deref when receiving skb during sock creation powerpc: fix a file leak in kvm_vcpu_ioctl_enable_cap() lirc: rc_dev_get_from_fd(): fix file leak ASoC: Intel: use soc_intel_is_byt_cr() only when IOSF_MBI is reachable ceph: fix incorrect kmalloc size of pagevec mempool nvme: split command copy into a helper nvme-pci: add missing condition check for existence of mapped data fs: don't allow non-init s_user_ns for filesystems without FS_USERNS_MOUNT powerpc/configs: Update defconfig with now user-visible CONFIG_FSL_IFC fuse: name fs_context consistently fuse: verify {g,u}id mount options correctly sysctl: always initialize i_uid/i_gid ext4: factor out a common helper to query extent map ext4: check the extent status again before inserting delalloc block soc: xilinx: move PM_INIT_FINALIZE to zynqmp_pm_domains driver drivers: soc: xilinx: check return status of get_api_version() driver core: Cast to (void *) with __force for __percpu pointer devres: Fix memory leakage caused by driver API devm_free_percpu() genirq: Allow the PM device to originate from irq domain irqchip/imx-irqsteer: Constify irq_chip struct irqchip/imx-irqsteer: Add runtime PM support irqchip/imx-irqsteer: Handle runtime power management correctly remoteproc: imx_rproc: ignore mapping vdev regions remoteproc: imx_rproc: Fix ignoring mapping vdev regions remoteproc: imx_rproc: Skip over memory region when node value is NULL drm/nouveau: prime: fix refcount underflow drm/vmwgfx: Fix overlay when using Screen Targets sched: act_ct: take care of padding in struct zones_ht_key net/iucv: fix use after free in iucv_sock_close() net/mlx5e: Add a check for the return value from mlx5_port_set_eth_ptys ipv6: fix ndisc_is_useropt() handling for PIO riscv/mm: Add handling for VM_FAULT_SIGSEGV in mm_fault_error() platform/chrome: cros_ec_proto: Lock device when updating MKBP version HID: wacom: Modify pen IDs protect the fetch of ->fd[fd] in do_dup2() from mispredictions ALSA: usb-audio: Correct surround channels in UAC1 channel map ALSA: hda/realtek: Add quirk for Acer Aspire E5-574G net: usb: sr9700: fix uninitialized variable use in sr_mdio_read r8169: don't increment tx_dropped in case of NETDEV_TX_BUSY mptcp: fix duplicate data handling netfilter: ipset: Add list flush to cancel_gc genirq: Allow irq_chip registration functions to take a const irq_chip irqchip/mbigen: Fix mbigen node address layout x86/mm: Fix pti_clone_pgtable() alignment assumption x86/mm: Fix pti_clone_entry_text() for i386 sctp: move hlist_node and hashent out of sctp_ep_common sctp: Fix null-ptr-deref in reuseport_add_sock(). net: usb: qmi_wwan: fix memory leak for not ip packets net: linkwatch: use system_unbound_wq Bluetooth: l2cap: always unlock channel in l2cap_conless_channel() net: dsa: bcm_sf2: Fix a possible memory leak in bcm_sf2_mdio_register() l2tp: fix lockdep splat net: fec: Stop PPS on driver remove rcutorture: Fix rcu_torture_fwd_cb_cr() data race md: do not delete safemode_timer in mddev_suspend md/raid5: avoid BUG_ON() while continue reshape after reassembling clocksource/drivers/sh_cmt: Address race condition for clock events ACPI: battery: create alarm sysfs attribute atomically ACPI: SBS: manage alarm sysfs attribute through psy core selftests/bpf: Fix send_signal test with nested CONFIG_PARAVIRT PCI: Add Edimax Vendor ID to pci_ids.h udf: prevent integer overflow in udf_bitmap_free_blocks() wifi: nl80211: don't give key data to userspace btrfs: fix bitmap leak when loading free space cache on duplicate entry drm/amdgpu: Fix the null pointer dereference to ras_manager drm/amdgpu/pm: Fix the null pointer dereference in apply_state_adjust_rules media: uvcvideo: Ignore empty TS packets media: uvcvideo: Fix the bandwdith quirk on USB 3.x jbd2: avoid memleak in jbd2_journal_write_metadata_buffer s390/sclp: Prevent release of buffer in I/O SUNRPC: Fix a race to wake a sync task sched/cputime: Fix mul_u64_u64_div_u64() precision for cputime ext4: fix wrong unit use in ext4_mb_find_by_goal arm64: cpufeature: Force HWCAP to be based on the sysreg visible to user-space arm64: Add Neoverse-V2 part arm64: cputype: Add Cortex-X4 definitions arm64: cputype: Add Neoverse-V3 definitions arm64: errata: Add workaround for Arm errata 3194386 and 3312417 arm64: cputype: Add Cortex-X3 definitions arm64: cputype: Add Cortex-A720 definitions arm64: cputype: Add Cortex-X925 definitions arm64: errata: Unify speculative SSBS errata logic arm64: errata: Expand speculative SSBS workaround arm64: cputype: Add Cortex-X1C definitions arm64: cputype: Add Cortex-A725 definitions arm64: errata: Expand speculative SSBS workaround (again) i2c: smbus: Improve handling of stuck alerts ASoC: codecs: wsa881x: Correct Soundwire ports mask i2c: smbus: Send alert notifications to all devices if source not found bpf: kprobe: remove unused declaring of bpf_kprobe_override kprobes: Fix to check symbol prefixes correctly spi: spi-fsl-lpspi: Fix scldiv calculation ALSA: usb-audio: Re-add ScratchAmp quirk entries drm/client: fix null pointer dereference in drm_client_modeset_probe ALSA: line6: Fix racy access to midibuf ALSA: hda: Add HP MP9 G4 Retail System AMS to force connect list ALSA: hda/hdmi: Yet more pin fix for HP EliteDesk 800 G4 usb: vhci-hcd: Do not drop references before new references are gained USB: serial: debug: do not echo input by default usb: gadget: core: Check for unset descriptor usb: gadget: u_serial: Set start_delayed during suspend scsi: ufs: core: Fix hba->last_dme_cmd_tstamp timestamp updating logic tick/broadcast: Move per CPU pointer access into the atomic section ntp: Clamp maxerror and esterror to operating range driver core: Fix uevent_show() vs driver detach race ntp: Safeguard against time_constant overflow scsi: mpt3sas: Remove scsi_dma_map() error messages scsi: mpt3sas: Avoid IOMMU page faults on REPORT ZONES irqchip/meson-gpio: support more than 8 channels gpio irq irqchip/meson-gpio: Convert meson_gpio_irq_controller::lock to 'raw_spinlock_t' serial: core: check uartclk for zero to avoid divide by zero irqchip/xilinx: Fix shift out of bounds genirq/irqdesc: Honor caller provided affinity in alloc_desc() power: supply: axp288_charger: Fix constant_charge_voltage writes power: supply: axp288_charger: Round constant_charge_voltage writes down tracing: Fix overflow in get_free_elt() padata: Fix possible divide-by-0 panic in padata_mt_helper() x86/mtrr: Check if fixed MTRRs exist before saving them drm/bridge: analogix_dp: properly handle zero sized AUX transactions drm/mgag200: Set DDC timeout in milliseconds mptcp: sched: check both directions for backup mptcp: distinguish rcv vs sent backup flag in requests mptcp: fix NL PM announced address accounting mptcp: mib: count MPJ with backup flag mptcp: export local_address mptcp: pm: fix backup support in signal endpoints samples: Add fs error monitoring example samples: Make fs-monitor depend on libc and headers Add gitignore file for samples/fanotify/ subdirectory Fix gcc 4.9 build issue in 5.10.y PCI/DPC: Fix use-after-free on concurrent DPC and hot-removal netfilter: nf_tables: set element extended ACK reporting support netfilter: nf_tables: use timestamp to check for set element timeout netfilter: nf_tables: allow clone callbacks to sleep netfilter: nf_tables: prefer nft_chain_validate drm/i915/gem: Fix Virtual Memory mapping boundaries calculation powerpc: Avoid nmi_enter/nmi_exit in real mode interrupt. arm64: cpufeature: Fix the visibility of compat hwcaps media: uvcvideo: Use entity get_cur in uvc_ctrl_set exec: Fix ToCToU between perm check and set-uid/gid usage nvme/pci: Add APST quirk for Lenovo N60z laptop vdpa: Make use of PFN_PHYS/PFN_UP/PFN_DOWN helper macro vhost-vdpa: switch to use vmf_insert_pfn() in the fault handler wifi: cfg80211: restrict NL80211_ATTR_TXQ_QUANTUM values ARM: dts: imx6qdl-kontron-samx6i: fix phy-mode media: Revert "media: dvb-usb: Fix unexpected infinite loop in dvb_usb_read_remote_control()" Linux 5.10.224 Change-Id: I7cd19d506c4c86df918a280598946060a494a161 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2034 lines
45 KiB
C
2034 lines
45 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/* Generic nexthop implementation
|
|
*
|
|
* Copyright (c) 2017-19 Cumulus Networks
|
|
* Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
|
|
*/
|
|
|
|
#include <linux/nexthop.h>
|
|
#include <linux/rtnetlink.h>
|
|
#include <linux/slab.h>
|
|
#include <net/arp.h>
|
|
#include <net/ipv6_stubs.h>
|
|
#include <net/lwtunnel.h>
|
|
#include <net/ndisc.h>
|
|
#include <net/nexthop.h>
|
|
#include <net/route.h>
|
|
#include <net/sock.h>
|
|
|
|
static void remove_nexthop(struct net *net, struct nexthop *nh,
|
|
struct nl_info *nlinfo);
|
|
|
|
#define NH_DEV_HASHBITS 8
|
|
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
|
|
|
|
static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
|
|
[NHA_ID] = { .type = NLA_U32 },
|
|
[NHA_GROUP] = { .type = NLA_BINARY },
|
|
[NHA_GROUP_TYPE] = { .type = NLA_U16 },
|
|
[NHA_BLACKHOLE] = { .type = NLA_FLAG },
|
|
[NHA_OIF] = { .type = NLA_U32 },
|
|
[NHA_GATEWAY] = { .type = NLA_BINARY },
|
|
[NHA_ENCAP_TYPE] = { .type = NLA_U16 },
|
|
[NHA_ENCAP] = { .type = NLA_NESTED },
|
|
[NHA_GROUPS] = { .type = NLA_FLAG },
|
|
[NHA_MASTER] = { .type = NLA_U32 },
|
|
[NHA_FDB] = { .type = NLA_FLAG },
|
|
};
|
|
|
|
static int call_nexthop_notifiers(struct net *net,
|
|
enum nexthop_event_type event_type,
|
|
struct nexthop *nh)
|
|
{
|
|
int err;
|
|
|
|
err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
|
|
event_type, nh);
|
|
return notifier_to_errno(err);
|
|
}
|
|
|
|
static unsigned int nh_dev_hashfn(unsigned int val)
|
|
{
|
|
unsigned int mask = NH_DEV_HASHSIZE - 1;
|
|
|
|
return (val ^
|
|
(val >> NH_DEV_HASHBITS) ^
|
|
(val >> (NH_DEV_HASHBITS * 2))) & mask;
|
|
}
|
|
|
|
static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
|
|
{
|
|
struct net_device *dev = nhi->fib_nhc.nhc_dev;
|
|
struct hlist_head *head;
|
|
unsigned int hash;
|
|
|
|
WARN_ON(!dev);
|
|
|
|
hash = nh_dev_hashfn(dev->ifindex);
|
|
head = &net->nexthop.devhash[hash];
|
|
hlist_add_head(&nhi->dev_hash, head);
|
|
}
|
|
|
|
static void nexthop_free_mpath(struct nexthop *nh)
|
|
{
|
|
struct nh_group *nhg;
|
|
int i;
|
|
|
|
nhg = rcu_dereference_raw(nh->nh_grp);
|
|
for (i = 0; i < nhg->num_nh; ++i) {
|
|
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
|
|
|
|
WARN_ON(!list_empty(&nhge->nh_list));
|
|
nexthop_put(nhge->nh);
|
|
}
|
|
|
|
WARN_ON(nhg->spare == nhg);
|
|
|
|
kfree(nhg->spare);
|
|
kfree(nhg);
|
|
}
|
|
|
|
static void nexthop_free_single(struct nexthop *nh)
|
|
{
|
|
struct nh_info *nhi;
|
|
|
|
nhi = rcu_dereference_raw(nh->nh_info);
|
|
switch (nhi->family) {
|
|
case AF_INET:
|
|
fib_nh_release(nh->net, &nhi->fib_nh);
|
|
break;
|
|
case AF_INET6:
|
|
ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
|
|
break;
|
|
}
|
|
kfree(nhi);
|
|
}
|
|
|
|
void nexthop_free_rcu(struct rcu_head *head)
|
|
{
|
|
struct nexthop *nh = container_of(head, struct nexthop, rcu);
|
|
|
|
if (nh->is_group)
|
|
nexthop_free_mpath(nh);
|
|
else
|
|
nexthop_free_single(nh);
|
|
|
|
kfree(nh);
|
|
}
|
|
EXPORT_SYMBOL_GPL(nexthop_free_rcu);
|
|
|
|
static struct nexthop *nexthop_alloc(void)
|
|
{
|
|
struct nexthop *nh;
|
|
|
|
nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
|
|
if (nh) {
|
|
INIT_LIST_HEAD(&nh->fi_list);
|
|
INIT_LIST_HEAD(&nh->f6i_list);
|
|
INIT_LIST_HEAD(&nh->grp_list);
|
|
INIT_LIST_HEAD(&nh->fdb_list);
|
|
}
|
|
return nh;
|
|
}
|
|
|
|
static struct nh_group *nexthop_grp_alloc(u16 num_nh)
|
|
{
|
|
struct nh_group *nhg;
|
|
|
|
nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL);
|
|
if (nhg)
|
|
nhg->num_nh = num_nh;
|
|
|
|
return nhg;
|
|
}
|
|
|
|
static void nh_base_seq_inc(struct net *net)
|
|
{
|
|
while (++net->nexthop.seq == 0)
|
|
;
|
|
}
|
|
|
|
/* no reference taken; rcu lock or rtnl must be held */
|
|
struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
|
|
{
|
|
struct rb_node **pp, *parent = NULL, *next;
|
|
|
|
pp = &net->nexthop.rb_root.rb_node;
|
|
while (1) {
|
|
struct nexthop *nh;
|
|
|
|
next = rcu_dereference_raw(*pp);
|
|
if (!next)
|
|
break;
|
|
parent = next;
|
|
|
|
nh = rb_entry(parent, struct nexthop, rb_node);
|
|
if (id < nh->id)
|
|
pp = &next->rb_left;
|
|
else if (id > nh->id)
|
|
pp = &next->rb_right;
|
|
else
|
|
return nh;
|
|
}
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nexthop_find_by_id);
|
|
|
|
/* used for auto id allocation; called with rtnl held */
|
|
static u32 nh_find_unused_id(struct net *net)
|
|
{
|
|
u32 id_start = net->nexthop.last_id_allocated;
|
|
|
|
while (1) {
|
|
net->nexthop.last_id_allocated++;
|
|
if (net->nexthop.last_id_allocated == id_start)
|
|
break;
|
|
|
|
if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
|
|
return net->nexthop.last_id_allocated;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
|
|
{
|
|
struct nexthop_grp *p;
|
|
size_t len = nhg->num_nh * sizeof(*p);
|
|
struct nlattr *nla;
|
|
u16 group_type = 0;
|
|
int i;
|
|
|
|
if (nhg->mpath)
|
|
group_type = NEXTHOP_GRP_TYPE_MPATH;
|
|
|
|
if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
|
|
goto nla_put_failure;
|
|
|
|
nla = nla_reserve(skb, NHA_GROUP, len);
|
|
if (!nla)
|
|
goto nla_put_failure;
|
|
|
|
p = nla_data(nla);
|
|
for (i = 0; i < nhg->num_nh; ++i) {
|
|
*p++ = (struct nexthop_grp) {
|
|
.id = nhg->nh_entries[i].nh->id,
|
|
.weight = nhg->nh_entries[i].weight - 1,
|
|
};
|
|
}
|
|
|
|
return 0;
|
|
|
|
nla_put_failure:
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
|
|
int event, u32 portid, u32 seq, unsigned int nlflags)
|
|
{
|
|
struct fib6_nh *fib6_nh;
|
|
struct fib_nh *fib_nh;
|
|
struct nlmsghdr *nlh;
|
|
struct nh_info *nhi;
|
|
struct nhmsg *nhm;
|
|
|
|
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
|
|
if (!nlh)
|
|
return -EMSGSIZE;
|
|
|
|
nhm = nlmsg_data(nlh);
|
|
nhm->nh_family = AF_UNSPEC;
|
|
nhm->nh_flags = nh->nh_flags;
|
|
nhm->nh_protocol = nh->protocol;
|
|
nhm->nh_scope = 0;
|
|
nhm->resvd = 0;
|
|
|
|
if (nla_put_u32(skb, NHA_ID, nh->id))
|
|
goto nla_put_failure;
|
|
|
|
if (nh->is_group) {
|
|
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
|
|
|
|
if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
|
|
goto nla_put_failure;
|
|
if (nla_put_nh_group(skb, nhg))
|
|
goto nla_put_failure;
|
|
goto out;
|
|
}
|
|
|
|
nhi = rtnl_dereference(nh->nh_info);
|
|
nhm->nh_family = nhi->family;
|
|
if (nhi->reject_nh) {
|
|
if (nla_put_flag(skb, NHA_BLACKHOLE))
|
|
goto nla_put_failure;
|
|
goto out;
|
|
} else if (nhi->fdb_nh) {
|
|
if (nla_put_flag(skb, NHA_FDB))
|
|
goto nla_put_failure;
|
|
} else {
|
|
const struct net_device *dev;
|
|
|
|
dev = nhi->fib_nhc.nhc_dev;
|
|
if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
|
|
goto nla_put_failure;
|
|
}
|
|
|
|
nhm->nh_scope = nhi->fib_nhc.nhc_scope;
|
|
switch (nhi->family) {
|
|
case AF_INET:
|
|
fib_nh = &nhi->fib_nh;
|
|
if (fib_nh->fib_nh_gw_family &&
|
|
nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
|
|
goto nla_put_failure;
|
|
break;
|
|
|
|
case AF_INET6:
|
|
fib6_nh = &nhi->fib6_nh;
|
|
if (fib6_nh->fib_nh_gw_family &&
|
|
nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
|
|
goto nla_put_failure;
|
|
break;
|
|
}
|
|
|
|
if (nhi->fib_nhc.nhc_lwtstate &&
|
|
lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
|
|
NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
|
|
goto nla_put_failure;
|
|
|
|
out:
|
|
nlmsg_end(skb, nlh);
|
|
return 0;
|
|
|
|
nla_put_failure:
|
|
nlmsg_cancel(skb, nlh);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static size_t nh_nlmsg_size_grp(struct nexthop *nh)
|
|
{
|
|
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
|
|
size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
|
|
|
|
return nla_total_size(sz) +
|
|
nla_total_size(2); /* NHA_GROUP_TYPE */
|
|
}
|
|
|
|
static size_t nh_nlmsg_size_single(struct nexthop *nh)
|
|
{
|
|
struct nh_info *nhi = rtnl_dereference(nh->nh_info);
|
|
size_t sz;
|
|
|
|
/* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
|
|
* are mutually exclusive
|
|
*/
|
|
sz = nla_total_size(4); /* NHA_OIF */
|
|
|
|
switch (nhi->family) {
|
|
case AF_INET:
|
|
if (nhi->fib_nh.fib_nh_gw_family)
|
|
sz += nla_total_size(4); /* NHA_GATEWAY */
|
|
break;
|
|
|
|
case AF_INET6:
|
|
/* NHA_GATEWAY */
|
|
if (nhi->fib6_nh.fib_nh_gw_family)
|
|
sz += nla_total_size(sizeof(const struct in6_addr));
|
|
break;
|
|
}
|
|
|
|
if (nhi->fib_nhc.nhc_lwtstate) {
|
|
sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
|
|
sz += nla_total_size(2); /* NHA_ENCAP_TYPE */
|
|
}
|
|
|
|
return sz;
|
|
}
|
|
|
|
static size_t nh_nlmsg_size(struct nexthop *nh)
|
|
{
|
|
size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg));
|
|
|
|
sz += nla_total_size(4); /* NHA_ID */
|
|
|
|
if (nh->is_group)
|
|
sz += nh_nlmsg_size_grp(nh);
|
|
else
|
|
sz += nh_nlmsg_size_single(nh);
|
|
|
|
return sz;
|
|
}
|
|
|
|
static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
|
|
{
|
|
unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
|
|
u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
|
|
struct sk_buff *skb;
|
|
int err = -ENOBUFS;
|
|
|
|
skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
|
|
if (!skb)
|
|
goto errout;
|
|
|
|
err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
|
|
if (err < 0) {
|
|
/* -EMSGSIZE implies BUG in nh_nlmsg_size() */
|
|
WARN_ON(err == -EMSGSIZE);
|
|
kfree_skb(skb);
|
|
goto errout;
|
|
}
|
|
|
|
rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
|
|
info->nlh, gfp_any());
|
|
return;
|
|
errout:
|
|
if (err < 0)
|
|
rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
|
|
}
|
|
|
|
static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
|
|
bool *is_fdb, struct netlink_ext_ack *extack)
|
|
{
|
|
if (nh->is_group) {
|
|
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
|
|
|
|
/* nested multipath (group within a group) is not
|
|
* supported
|
|
*/
|
|
if (nhg->mpath) {
|
|
NL_SET_ERR_MSG(extack,
|
|
"Multipath group can not be a nexthop within a group");
|
|
return false;
|
|
}
|
|
*is_fdb = nhg->fdb_nh;
|
|
} else {
|
|
struct nh_info *nhi = rtnl_dereference(nh->nh_info);
|
|
|
|
if (nhi->reject_nh && npaths > 1) {
|
|
NL_SET_ERR_MSG(extack,
|
|
"Blackhole nexthop can not be used in a group with more than 1 path");
|
|
return false;
|
|
}
|
|
*is_fdb = nhi->fdb_nh;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct nh_info *nhi;
|
|
|
|
nhi = rtnl_dereference(nh->nh_info);
|
|
|
|
if (!nhi->fdb_nh) {
|
|
NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (*nh_family == AF_UNSPEC) {
|
|
*nh_family = nhi->family;
|
|
} else if (*nh_family != nhi->family) {
|
|
NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
unsigned int len = nla_len(tb[NHA_GROUP]);
|
|
u8 nh_family = AF_UNSPEC;
|
|
struct nexthop_grp *nhg;
|
|
unsigned int i, j;
|
|
u8 nhg_fdb = 0;
|
|
|
|
if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
|
|
NL_SET_ERR_MSG(extack,
|
|
"Invalid length for nexthop group attribute");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* convert len to number of nexthop ids */
|
|
len /= sizeof(*nhg);
|
|
|
|
nhg = nla_data(tb[NHA_GROUP]);
|
|
for (i = 0; i < len; ++i) {
|
|
if (nhg[i].resvd1 || nhg[i].resvd2) {
|
|
NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
|
|
return -EINVAL;
|
|
}
|
|
if (nhg[i].weight > 254) {
|
|
NL_SET_ERR_MSG(extack, "Invalid value for weight");
|
|
return -EINVAL;
|
|
}
|
|
for (j = i + 1; j < len; ++j) {
|
|
if (nhg[i].id == nhg[j].id) {
|
|
NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (tb[NHA_FDB])
|
|
nhg_fdb = 1;
|
|
nhg = nla_data(tb[NHA_GROUP]);
|
|
for (i = 0; i < len; ++i) {
|
|
struct nexthop *nh;
|
|
bool is_fdb_nh;
|
|
|
|
nh = nexthop_find_by_id(net, nhg[i].id);
|
|
if (!nh) {
|
|
NL_SET_ERR_MSG(extack, "Invalid nexthop id");
|
|
return -EINVAL;
|
|
}
|
|
if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
|
|
return -EINVAL;
|
|
|
|
if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
|
|
return -EINVAL;
|
|
|
|
if (!nhg_fdb && is_fdb_nh) {
|
|
NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) {
|
|
if (!tb[i])
|
|
continue;
|
|
if (i == NHA_FDB)
|
|
continue;
|
|
NL_SET_ERR_MSG(extack,
|
|
"No other attributes can be set in nexthop groups");
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool ipv6_good_nh(const struct fib6_nh *nh)
|
|
{
|
|
int state = NUD_REACHABLE;
|
|
struct neighbour *n;
|
|
|
|
rcu_read_lock_bh();
|
|
|
|
n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
|
|
if (n)
|
|
state = n->nud_state;
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
return !!(state & NUD_VALID);
|
|
}
|
|
|
|
static bool ipv4_good_nh(const struct fib_nh *nh)
|
|
{
|
|
int state = NUD_REACHABLE;
|
|
struct neighbour *n;
|
|
|
|
rcu_read_lock_bh();
|
|
|
|
n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
|
|
(__force u32)nh->fib_nh_gw4);
|
|
if (n)
|
|
state = n->nud_state;
|
|
|
|
rcu_read_unlock_bh();
|
|
|
|
return !!(state & NUD_VALID);
|
|
}
|
|
|
|
struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
|
|
{
|
|
struct nexthop *rc = NULL;
|
|
struct nh_group *nhg;
|
|
int i;
|
|
|
|
if (!nh->is_group)
|
|
return nh;
|
|
|
|
nhg = rcu_dereference(nh->nh_grp);
|
|
for (i = 0; i < nhg->num_nh; ++i) {
|
|
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
|
|
struct nh_info *nhi;
|
|
|
|
if (hash > atomic_read(&nhge->upper_bound))
|
|
continue;
|
|
|
|
nhi = rcu_dereference(nhge->nh->nh_info);
|
|
if (nhi->fdb_nh)
|
|
return nhge->nh;
|
|
|
|
/* nexthops always check if it is good and does
|
|
* not rely on a sysctl for this behavior
|
|
*/
|
|
switch (nhi->family) {
|
|
case AF_INET:
|
|
if (ipv4_good_nh(&nhi->fib_nh))
|
|
return nhge->nh;
|
|
break;
|
|
case AF_INET6:
|
|
if (ipv6_good_nh(&nhi->fib6_nh))
|
|
return nhge->nh;
|
|
break;
|
|
}
|
|
|
|
if (!rc)
|
|
rc = nhge->nh;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nexthop_select_path);
|
|
|
|
int nexthop_for_each_fib6_nh(struct nexthop *nh,
|
|
int (*cb)(struct fib6_nh *nh, void *arg),
|
|
void *arg)
|
|
{
|
|
struct nh_info *nhi;
|
|
int err;
|
|
|
|
if (nh->is_group) {
|
|
struct nh_group *nhg;
|
|
int i;
|
|
|
|
nhg = rcu_dereference_rtnl(nh->nh_grp);
|
|
for (i = 0; i < nhg->num_nh; i++) {
|
|
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
|
|
|
|
nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
|
|
err = cb(&nhi->fib6_nh, arg);
|
|
if (err)
|
|
return err;
|
|
}
|
|
} else {
|
|
nhi = rcu_dereference_rtnl(nh->nh_info);
|
|
err = cb(&nhi->fib6_nh, arg);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
|
|
|
|
static int check_src_addr(const struct in6_addr *saddr,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
if (!ipv6_addr_any(saddr)) {
|
|
NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
|
|
return -EINVAL;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct nh_info *nhi;
|
|
bool is_fdb_nh;
|
|
|
|
/* fib6_src is unique to a fib6_info and limits the ability to cache
|
|
* routes in fib6_nh within a nexthop that is potentially shared
|
|
* across multiple fib entries. If the config wants to use source
|
|
* routing it can not use nexthop objects. mlxsw also does not allow
|
|
* fib6_src on routes.
|
|
*/
|
|
if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
|
|
return -EINVAL;
|
|
|
|
if (nh->is_group) {
|
|
struct nh_group *nhg;
|
|
|
|
nhg = rtnl_dereference(nh->nh_grp);
|
|
if (nhg->has_v4)
|
|
goto no_v4_nh;
|
|
is_fdb_nh = nhg->fdb_nh;
|
|
} else {
|
|
nhi = rtnl_dereference(nh->nh_info);
|
|
if (nhi->family == AF_INET)
|
|
goto no_v4_nh;
|
|
is_fdb_nh = nhi->fdb_nh;
|
|
}
|
|
|
|
if (is_fdb_nh) {
|
|
NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
no_v4_nh:
|
|
NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
|
|
return -EINVAL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(fib6_check_nexthop);
|
|
|
|
/* if existing nexthop has ipv6 routes linked to it, need
|
|
* to verify this new spec works with ipv6
|
|
*/
|
|
static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct fib6_info *f6i;
|
|
|
|
if (list_empty(&old->f6i_list))
|
|
return 0;
|
|
|
|
list_for_each_entry(f6i, &old->f6i_list, nh_list) {
|
|
if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
|
|
return -EINVAL;
|
|
}
|
|
|
|
return fib6_check_nexthop(new, NULL, extack);
|
|
}
|
|
|
|
static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
|
|
NL_SET_ERR_MSG(extack,
|
|
"Route with host scope can not have a gateway");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
|
|
NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Invoked by fib add code to verify nexthop by id is ok with
|
|
* config for prefix; parts of fib_check_nh not done when nexthop
|
|
* object is used.
|
|
*/
|
|
int fib_check_nexthop(struct nexthop *nh, u8 scope,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct nh_info *nhi;
|
|
int err = 0;
|
|
|
|
if (nh->is_group) {
|
|
struct nh_group *nhg;
|
|
|
|
nhg = rtnl_dereference(nh->nh_grp);
|
|
if (nhg->fdb_nh) {
|
|
NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
|
|
err = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
if (scope == RT_SCOPE_HOST) {
|
|
NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
|
|
err = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
/* all nexthops in a group have the same scope */
|
|
nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
|
|
err = nexthop_check_scope(nhi, scope, extack);
|
|
} else {
|
|
nhi = rtnl_dereference(nh->nh_info);
|
|
if (nhi->fdb_nh) {
|
|
NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
|
|
err = -EINVAL;
|
|
goto out;
|
|
}
|
|
err = nexthop_check_scope(nhi, scope, extack);
|
|
}
|
|
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct fib_info *fi;
|
|
|
|
list_for_each_entry(fi, &old->fi_list, nh_list) {
|
|
int err;
|
|
|
|
err = fib_check_nexthop(new, fi->fib_scope, extack);
|
|
if (err)
|
|
return err;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void nh_group_rebalance(struct nh_group *nhg)
|
|
{
|
|
int total = 0;
|
|
int w = 0;
|
|
int i;
|
|
|
|
for (i = 0; i < nhg->num_nh; ++i)
|
|
total += nhg->nh_entries[i].weight;
|
|
|
|
for (i = 0; i < nhg->num_nh; ++i) {
|
|
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
|
|
int upper_bound;
|
|
|
|
w += nhge->weight;
|
|
upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
|
|
atomic_set(&nhge->upper_bound, upper_bound);
|
|
}
|
|
}
|
|
|
|
static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
|
|
struct nl_info *nlinfo)
|
|
{
|
|
struct nh_grp_entry *nhges, *new_nhges;
|
|
struct nexthop *nhp = nhge->nh_parent;
|
|
struct nexthop *nh = nhge->nh;
|
|
struct nh_group *nhg, *newg;
|
|
int i, j;
|
|
|
|
WARN_ON(!nh);
|
|
|
|
nhg = rtnl_dereference(nhp->nh_grp);
|
|
newg = nhg->spare;
|
|
|
|
/* last entry, keep it visible and remove the parent */
|
|
if (nhg->num_nh == 1) {
|
|
remove_nexthop(net, nhp, nlinfo);
|
|
return;
|
|
}
|
|
|
|
newg->has_v4 = false;
|
|
newg->mpath = nhg->mpath;
|
|
newg->fdb_nh = nhg->fdb_nh;
|
|
newg->num_nh = nhg->num_nh;
|
|
|
|
/* copy old entries to new except the one getting removed */
|
|
nhges = nhg->nh_entries;
|
|
new_nhges = newg->nh_entries;
|
|
for (i = 0, j = 0; i < nhg->num_nh; ++i) {
|
|
struct nh_info *nhi;
|
|
|
|
/* current nexthop getting removed */
|
|
if (nhg->nh_entries[i].nh == nh) {
|
|
newg->num_nh--;
|
|
continue;
|
|
}
|
|
|
|
nhi = rtnl_dereference(nhges[i].nh->nh_info);
|
|
if (nhi->family == AF_INET)
|
|
newg->has_v4 = true;
|
|
|
|
list_del(&nhges[i].nh_list);
|
|
new_nhges[j].nh_parent = nhges[i].nh_parent;
|
|
new_nhges[j].nh = nhges[i].nh;
|
|
new_nhges[j].weight = nhges[i].weight;
|
|
list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list);
|
|
j++;
|
|
}
|
|
|
|
nh_group_rebalance(newg);
|
|
rcu_assign_pointer(nhp->nh_grp, newg);
|
|
|
|
list_del(&nhge->nh_list);
|
|
nexthop_put(nhge->nh);
|
|
|
|
if (nlinfo)
|
|
nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
|
|
}
|
|
|
|
static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
|
|
struct nl_info *nlinfo)
|
|
{
|
|
struct nh_grp_entry *nhge, *tmp;
|
|
|
|
list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
|
|
remove_nh_grp_entry(net, nhge, nlinfo);
|
|
|
|
/* make sure all see the newly published array before releasing rtnl */
|
|
synchronize_net();
|
|
}
|
|
|
|
static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
|
|
{
|
|
struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
|
|
int i, num_nh = nhg->num_nh;
|
|
|
|
for (i = 0; i < num_nh; ++i) {
|
|
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
|
|
|
|
if (WARN_ON(!nhge->nh))
|
|
continue;
|
|
|
|
list_del_init(&nhge->nh_list);
|
|
}
|
|
}
|
|
|
|
/* not called for nexthop replace */
|
|
static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
|
|
{
|
|
struct fib6_info *f6i, *tmp;
|
|
bool do_flush = false;
|
|
struct fib_info *fi;
|
|
|
|
list_for_each_entry(fi, &nh->fi_list, nh_list) {
|
|
fi->fib_flags |= RTNH_F_DEAD;
|
|
do_flush = true;
|
|
}
|
|
if (do_flush)
|
|
fib_flush(net);
|
|
|
|
/* ip6_del_rt removes the entry from this list hence the _safe */
|
|
list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
|
|
/* __ip6_del_rt does a release, so do a hold here */
|
|
fib6_info_hold(f6i);
|
|
ipv6_stub->ip6_del_rt(net, f6i,
|
|
!READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));
|
|
}
|
|
}
|
|
|
|
static void __remove_nexthop(struct net *net, struct nexthop *nh,
|
|
struct nl_info *nlinfo)
|
|
{
|
|
__remove_nexthop_fib(net, nh);
|
|
|
|
if (nh->is_group) {
|
|
remove_nexthop_group(nh, nlinfo);
|
|
} else {
|
|
struct nh_info *nhi;
|
|
|
|
nhi = rtnl_dereference(nh->nh_info);
|
|
if (nhi->fib_nhc.nhc_dev)
|
|
hlist_del(&nhi->dev_hash);
|
|
|
|
remove_nexthop_from_groups(net, nh, nlinfo);
|
|
}
|
|
}
|
|
|
|
static void remove_nexthop(struct net *net, struct nexthop *nh,
|
|
struct nl_info *nlinfo)
|
|
{
|
|
call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh);
|
|
|
|
/* remove from the tree */
|
|
rb_erase(&nh->rb_node, &net->nexthop.rb_root);
|
|
|
|
if (nlinfo)
|
|
nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
|
|
|
|
__remove_nexthop(net, nh, nlinfo);
|
|
nh_base_seq_inc(net);
|
|
|
|
nexthop_put(nh);
|
|
}
|
|
|
|
/* if any FIB entries reference this nexthop, any dst entries
|
|
* need to be regenerated
|
|
*/
|
|
static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
|
|
{
|
|
struct fib6_info *f6i;
|
|
|
|
if (!list_empty(&nh->fi_list))
|
|
rt_cache_flush(net);
|
|
|
|
list_for_each_entry(f6i, &nh->f6i_list, nh_list)
|
|
ipv6_stub->fib6_update_sernum(net, f6i);
|
|
}
|
|
|
|
static int replace_nexthop_grp(struct net *net, struct nexthop *old,
|
|
struct nexthop *new,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct nh_group *oldg, *newg;
|
|
int i;
|
|
|
|
if (!new->is_group) {
|
|
NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
|
|
return -EINVAL;
|
|
}
|
|
|
|
oldg = rtnl_dereference(old->nh_grp);
|
|
newg = rtnl_dereference(new->nh_grp);
|
|
|
|
/* update parents - used by nexthop code for cleanup */
|
|
for (i = 0; i < newg->num_nh; i++)
|
|
newg->nh_entries[i].nh_parent = old;
|
|
|
|
rcu_assign_pointer(old->nh_grp, newg);
|
|
|
|
for (i = 0; i < oldg->num_nh; i++)
|
|
oldg->nh_entries[i].nh_parent = new;
|
|
|
|
rcu_assign_pointer(new->nh_grp, oldg);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void nh_group_v4_update(struct nh_group *nhg)
|
|
{
|
|
struct nh_grp_entry *nhges;
|
|
bool has_v4 = false;
|
|
int i;
|
|
|
|
nhges = nhg->nh_entries;
|
|
for (i = 0; i < nhg->num_nh; i++) {
|
|
struct nh_info *nhi;
|
|
|
|
nhi = rtnl_dereference(nhges[i].nh->nh_info);
|
|
if (nhi->family == AF_INET)
|
|
has_v4 = true;
|
|
}
|
|
nhg->has_v4 = has_v4;
|
|
}
|
|
|
|
static int replace_nexthop_single(struct net *net, struct nexthop *old,
|
|
struct nexthop *new,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct nh_info *oldi, *newi;
|
|
|
|
if (new->is_group) {
|
|
NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
|
|
return -EINVAL;
|
|
}
|
|
|
|
oldi = rtnl_dereference(old->nh_info);
|
|
newi = rtnl_dereference(new->nh_info);
|
|
|
|
newi->nh_parent = old;
|
|
oldi->nh_parent = new;
|
|
|
|
old->protocol = new->protocol;
|
|
old->nh_flags = new->nh_flags;
|
|
|
|
rcu_assign_pointer(old->nh_info, newi);
|
|
rcu_assign_pointer(new->nh_info, oldi);
|
|
|
|
/* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
|
|
* update IPv4 indication in all the groups using the nexthop.
|
|
*/
|
|
if (oldi->family == AF_INET && newi->family == AF_INET6) {
|
|
struct nh_grp_entry *nhge;
|
|
|
|
list_for_each_entry(nhge, &old->grp_list, nh_list) {
|
|
struct nexthop *nhp = nhge->nh_parent;
|
|
struct nh_group *nhg;
|
|
|
|
nhg = rtnl_dereference(nhp->nh_grp);
|
|
nh_group_v4_update(nhg);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
|
|
struct nl_info *info)
|
|
{
|
|
struct fib6_info *f6i;
|
|
|
|
if (!list_empty(&nh->fi_list)) {
|
|
struct fib_info *fi;
|
|
|
|
/* expectation is a few fib_info per nexthop and then
|
|
* a lot of routes per fib_info. So mark the fib_info
|
|
* and then walk the fib tables once
|
|
*/
|
|
list_for_each_entry(fi, &nh->fi_list, nh_list)
|
|
fi->nh_updated = true;
|
|
|
|
fib_info_notify_update(net, info);
|
|
|
|
list_for_each_entry(fi, &nh->fi_list, nh_list)
|
|
fi->nh_updated = false;
|
|
}
|
|
|
|
list_for_each_entry(f6i, &nh->f6i_list, nh_list)
|
|
ipv6_stub->fib6_rt_update(net, f6i, info);
|
|
}
|
|
|
|
/* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
|
|
* linked to this nexthop and for all groups that the nexthop
|
|
* is a member of
|
|
*/
|
|
static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
|
|
struct nl_info *info)
|
|
{
|
|
struct nh_grp_entry *nhge;
|
|
|
|
__nexthop_replace_notify(net, nh, info);
|
|
|
|
list_for_each_entry(nhge, &nh->grp_list, nh_list)
|
|
__nexthop_replace_notify(net, nhge->nh_parent, info);
|
|
}
|
|
|
|
static int replace_nexthop(struct net *net, struct nexthop *old,
|
|
struct nexthop *new, struct netlink_ext_ack *extack)
|
|
{
|
|
bool new_is_reject = false;
|
|
struct nh_grp_entry *nhge;
|
|
int err;
|
|
|
|
/* check that existing FIB entries are ok with the
|
|
* new nexthop definition
|
|
*/
|
|
err = fib_check_nh_list(old, new, extack);
|
|
if (err)
|
|
return err;
|
|
|
|
err = fib6_check_nh_list(old, new, extack);
|
|
if (err)
|
|
return err;
|
|
|
|
if (!new->is_group) {
|
|
struct nh_info *nhi = rtnl_dereference(new->nh_info);
|
|
|
|
new_is_reject = nhi->reject_nh;
|
|
}
|
|
|
|
list_for_each_entry(nhge, &old->grp_list, nh_list) {
|
|
/* if new nexthop is a blackhole, any groups using this
|
|
* nexthop cannot have more than 1 path
|
|
*/
|
|
if (new_is_reject &&
|
|
nexthop_num_path(nhge->nh_parent) > 1) {
|
|
NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
|
|
return -EINVAL;
|
|
}
|
|
|
|
err = fib_check_nh_list(nhge->nh_parent, new, extack);
|
|
if (err)
|
|
return err;
|
|
|
|
err = fib6_check_nh_list(nhge->nh_parent, new, extack);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
if (old->is_group)
|
|
err = replace_nexthop_grp(net, old, new, extack);
|
|
else
|
|
err = replace_nexthop_single(net, old, new, extack);
|
|
|
|
if (!err) {
|
|
nh_rt_cache_flush(net, old);
|
|
|
|
__remove_nexthop(net, new, NULL);
|
|
nexthop_put(new);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* called with rtnl_lock held */
|
|
static int insert_nexthop(struct net *net, struct nexthop *new_nh,
|
|
struct nh_config *cfg, struct netlink_ext_ack *extack)
|
|
{
|
|
struct rb_node **pp, *parent = NULL, *next;
|
|
struct rb_root *root = &net->nexthop.rb_root;
|
|
bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
|
|
bool create = !!(cfg->nlflags & NLM_F_CREATE);
|
|
u32 new_id = new_nh->id;
|
|
int replace_notify = 0;
|
|
int rc = -EEXIST;
|
|
|
|
pp = &root->rb_node;
|
|
while (1) {
|
|
struct nexthop *nh;
|
|
|
|
next = *pp;
|
|
if (!next)
|
|
break;
|
|
|
|
parent = next;
|
|
|
|
nh = rb_entry(parent, struct nexthop, rb_node);
|
|
if (new_id < nh->id) {
|
|
pp = &next->rb_left;
|
|
} else if (new_id > nh->id) {
|
|
pp = &next->rb_right;
|
|
} else if (replace) {
|
|
rc = replace_nexthop(net, nh, new_nh, extack);
|
|
if (!rc) {
|
|
new_nh = nh; /* send notification with old nh */
|
|
replace_notify = 1;
|
|
}
|
|
goto out;
|
|
} else {
|
|
/* id already exists and not a replace */
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if (replace && !create) {
|
|
NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
|
|
rc = -ENOENT;
|
|
goto out;
|
|
}
|
|
|
|
rb_link_node_rcu(&new_nh->rb_node, parent, pp);
|
|
rb_insert_color(&new_nh->rb_node, root);
|
|
rc = 0;
|
|
out:
|
|
if (!rc) {
|
|
nh_base_seq_inc(net);
|
|
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
|
|
if (replace_notify &&
|
|
READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode))
|
|
nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/* rtnl */
|
|
/* remove all nexthops tied to a device being deleted */
|
|
static void nexthop_flush_dev(struct net_device *dev, unsigned long event)
|
|
{
|
|
unsigned int hash = nh_dev_hashfn(dev->ifindex);
|
|
struct net *net = dev_net(dev);
|
|
struct hlist_head *head = &net->nexthop.devhash[hash];
|
|
struct hlist_node *n;
|
|
struct nh_info *nhi;
|
|
|
|
hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
|
|
if (nhi->fib_nhc.nhc_dev != dev)
|
|
continue;
|
|
|
|
if (nhi->reject_nh &&
|
|
(event == NETDEV_DOWN || event == NETDEV_CHANGE))
|
|
continue;
|
|
|
|
remove_nexthop(net, nhi->nh_parent, NULL);
|
|
}
|
|
}
|
|
|
|
/* rtnl; called when net namespace is deleted */
|
|
static void flush_all_nexthops(struct net *net)
|
|
{
|
|
struct rb_root *root = &net->nexthop.rb_root;
|
|
struct rb_node *node;
|
|
struct nexthop *nh;
|
|
|
|
while ((node = rb_first(root))) {
|
|
nh = rb_entry(node, struct nexthop, rb_node);
|
|
remove_nexthop(net, nh, NULL);
|
|
cond_resched();
|
|
}
|
|
}
|
|
|
|
static struct nexthop *nexthop_create_group(struct net *net,
|
|
struct nh_config *cfg)
|
|
{
|
|
struct nlattr *grps_attr = cfg->nh_grp;
|
|
struct nexthop_grp *entry = nla_data(grps_attr);
|
|
u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
|
|
struct nh_group *nhg;
|
|
struct nexthop *nh;
|
|
int i;
|
|
|
|
if (WARN_ON(!num_nh))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
nh = nexthop_alloc();
|
|
if (!nh)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
nh->is_group = 1;
|
|
|
|
nhg = nexthop_grp_alloc(num_nh);
|
|
if (!nhg) {
|
|
kfree(nh);
|
|
return ERR_PTR(-ENOMEM);
|
|
}
|
|
|
|
/* spare group used for removals */
|
|
nhg->spare = nexthop_grp_alloc(num_nh);
|
|
if (!nhg->spare) {
|
|
kfree(nhg);
|
|
kfree(nh);
|
|
return ERR_PTR(-ENOMEM);
|
|
}
|
|
nhg->spare->spare = nhg;
|
|
|
|
for (i = 0; i < nhg->num_nh; ++i) {
|
|
struct nexthop *nhe;
|
|
struct nh_info *nhi;
|
|
|
|
nhe = nexthop_find_by_id(net, entry[i].id);
|
|
if (!nexthop_get(nhe))
|
|
goto out_no_nh;
|
|
|
|
nhi = rtnl_dereference(nhe->nh_info);
|
|
if (nhi->family == AF_INET)
|
|
nhg->has_v4 = true;
|
|
|
|
nhg->nh_entries[i].nh = nhe;
|
|
nhg->nh_entries[i].weight = entry[i].weight + 1;
|
|
list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
|
|
nhg->nh_entries[i].nh_parent = nh;
|
|
}
|
|
|
|
if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
|
|
nhg->mpath = 1;
|
|
nh_group_rebalance(nhg);
|
|
}
|
|
|
|
if (cfg->nh_fdb)
|
|
nhg->fdb_nh = 1;
|
|
|
|
rcu_assign_pointer(nh->nh_grp, nhg);
|
|
|
|
return nh;
|
|
|
|
out_no_nh:
|
|
for (i--; i >= 0; --i) {
|
|
list_del(&nhg->nh_entries[i].nh_list);
|
|
nexthop_put(nhg->nh_entries[i].nh);
|
|
}
|
|
|
|
kfree(nhg->spare);
|
|
kfree(nhg);
|
|
kfree(nh);
|
|
|
|
return ERR_PTR(-ENOENT);
|
|
}
|
|
|
|
static int nh_create_ipv4(struct net *net, struct nexthop *nh,
|
|
struct nh_info *nhi, struct nh_config *cfg,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct fib_nh *fib_nh = &nhi->fib_nh;
|
|
struct fib_config fib_cfg = {
|
|
.fc_oif = cfg->nh_ifindex,
|
|
.fc_gw4 = cfg->gw.ipv4,
|
|
.fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
|
|
.fc_flags = cfg->nh_flags,
|
|
.fc_nlinfo = cfg->nlinfo,
|
|
.fc_encap = cfg->nh_encap,
|
|
.fc_encap_type = cfg->nh_encap_type,
|
|
};
|
|
u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
|
|
int err;
|
|
|
|
err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
|
|
if (err) {
|
|
fib_nh_release(net, fib_nh);
|
|
goto out;
|
|
}
|
|
|
|
if (nhi->fdb_nh)
|
|
goto out;
|
|
|
|
/* sets nh_dev if successful */
|
|
err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
|
|
if (!err) {
|
|
nh->nh_flags = fib_nh->fib_nh_flags;
|
|
fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
|
|
!fib_nh->fib_nh_scope ? 0 : fib_nh->fib_nh_scope - 1);
|
|
} else {
|
|
fib_nh_release(net, fib_nh);
|
|
}
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
static int nh_create_ipv6(struct net *net, struct nexthop *nh,
|
|
struct nh_info *nhi, struct nh_config *cfg,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct fib6_nh *fib6_nh = &nhi->fib6_nh;
|
|
struct fib6_config fib6_cfg = {
|
|
.fc_table = l3mdev_fib_table(cfg->dev),
|
|
.fc_ifindex = cfg->nh_ifindex,
|
|
.fc_gateway = cfg->gw.ipv6,
|
|
.fc_flags = cfg->nh_flags,
|
|
.fc_nlinfo = cfg->nlinfo,
|
|
.fc_encap = cfg->nh_encap,
|
|
.fc_encap_type = cfg->nh_encap_type,
|
|
.fc_is_fdb = cfg->nh_fdb,
|
|
};
|
|
int err;
|
|
|
|
if (!ipv6_addr_any(&cfg->gw.ipv6))
|
|
fib6_cfg.fc_flags |= RTF_GATEWAY;
|
|
|
|
/* sets nh_dev if successful */
|
|
err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
|
|
extack);
|
|
if (err) {
|
|
/* IPv6 is not enabled, don't call fib6_nh_release */
|
|
if (err == -EAFNOSUPPORT)
|
|
goto out;
|
|
ipv6_stub->fib6_nh_release(fib6_nh);
|
|
} else {
|
|
nh->nh_flags = fib6_nh->fib_nh_flags;
|
|
}
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct nh_info *nhi;
|
|
struct nexthop *nh;
|
|
int err = 0;
|
|
|
|
nh = nexthop_alloc();
|
|
if (!nh)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
|
|
if (!nhi) {
|
|
kfree(nh);
|
|
return ERR_PTR(-ENOMEM);
|
|
}
|
|
|
|
nh->nh_flags = cfg->nh_flags;
|
|
nh->net = net;
|
|
|
|
nhi->nh_parent = nh;
|
|
nhi->family = cfg->nh_family;
|
|
nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
|
|
|
|
if (cfg->nh_fdb)
|
|
nhi->fdb_nh = 1;
|
|
|
|
if (cfg->nh_blackhole) {
|
|
nhi->reject_nh = 1;
|
|
cfg->nh_ifindex = net->loopback_dev->ifindex;
|
|
}
|
|
|
|
switch (cfg->nh_family) {
|
|
case AF_INET:
|
|
err = nh_create_ipv4(net, nh, nhi, cfg, extack);
|
|
break;
|
|
case AF_INET6:
|
|
err = nh_create_ipv6(net, nh, nhi, cfg, extack);
|
|
break;
|
|
}
|
|
|
|
if (err) {
|
|
kfree(nhi);
|
|
kfree(nh);
|
|
return ERR_PTR(err);
|
|
}
|
|
|
|
/* add the entry to the device based hash */
|
|
if (!nhi->fdb_nh)
|
|
nexthop_devhash_add(net, nhi);
|
|
|
|
rcu_assign_pointer(nh->nh_info, nhi);
|
|
|
|
return nh;
|
|
}
|
|
|
|
/* called with rtnl lock held */
|
|
static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct nexthop *nh;
|
|
int err;
|
|
|
|
if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
|
|
NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
if (!cfg->nh_id) {
|
|
cfg->nh_id = nh_find_unused_id(net);
|
|
if (!cfg->nh_id) {
|
|
NL_SET_ERR_MSG(extack, "No unused id");
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
}
|
|
|
|
if (cfg->nh_grp)
|
|
nh = nexthop_create_group(net, cfg);
|
|
else
|
|
nh = nexthop_create(net, cfg, extack);
|
|
|
|
if (IS_ERR(nh))
|
|
return nh;
|
|
|
|
refcount_set(&nh->refcnt, 1);
|
|
nh->id = cfg->nh_id;
|
|
nh->protocol = cfg->nh_protocol;
|
|
nh->net = net;
|
|
|
|
err = insert_nexthop(net, nh, cfg, extack);
|
|
if (err) {
|
|
__remove_nexthop(net, nh, NULL);
|
|
nexthop_put(nh);
|
|
nh = ERR_PTR(err);
|
|
}
|
|
|
|
return nh;
|
|
}
|
|
|
|
static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
|
|
struct nlmsghdr *nlh, struct nh_config *cfg,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct nhmsg *nhm = nlmsg_data(nlh);
|
|
struct nlattr *tb[NHA_MAX + 1];
|
|
int err;
|
|
|
|
err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
|
|
extack);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
err = -EINVAL;
|
|
if (nhm->resvd || nhm->nh_scope) {
|
|
NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
|
|
goto out;
|
|
}
|
|
if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
|
|
NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
|
|
goto out;
|
|
}
|
|
|
|
switch (nhm->nh_family) {
|
|
case AF_INET:
|
|
case AF_INET6:
|
|
break;
|
|
case AF_UNSPEC:
|
|
if (tb[NHA_GROUP])
|
|
break;
|
|
fallthrough;
|
|
default:
|
|
NL_SET_ERR_MSG(extack, "Invalid address family");
|
|
goto out;
|
|
}
|
|
|
|
if (tb[NHA_GROUPS] || tb[NHA_MASTER]) {
|
|
NL_SET_ERR_MSG(extack, "Invalid attributes in request");
|
|
goto out;
|
|
}
|
|
|
|
memset(cfg, 0, sizeof(*cfg));
|
|
cfg->nlflags = nlh->nlmsg_flags;
|
|
cfg->nlinfo.portid = NETLINK_CB(skb).portid;
|
|
cfg->nlinfo.nlh = nlh;
|
|
cfg->nlinfo.nl_net = net;
|
|
|
|
cfg->nh_family = nhm->nh_family;
|
|
cfg->nh_protocol = nhm->nh_protocol;
|
|
cfg->nh_flags = nhm->nh_flags;
|
|
|
|
if (tb[NHA_ID])
|
|
cfg->nh_id = nla_get_u32(tb[NHA_ID]);
|
|
|
|
if (tb[NHA_FDB]) {
|
|
if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
|
|
tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
|
|
NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
|
|
goto out;
|
|
}
|
|
if (nhm->nh_flags) {
|
|
NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
|
|
goto out;
|
|
}
|
|
cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
|
|
}
|
|
|
|
if (tb[NHA_GROUP]) {
|
|
if (nhm->nh_family != AF_UNSPEC) {
|
|
NL_SET_ERR_MSG(extack, "Invalid family for group");
|
|
goto out;
|
|
}
|
|
cfg->nh_grp = tb[NHA_GROUP];
|
|
|
|
cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
|
|
if (tb[NHA_GROUP_TYPE])
|
|
cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
|
|
|
|
if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
|
|
NL_SET_ERR_MSG(extack, "Invalid group type");
|
|
goto out;
|
|
}
|
|
err = nh_check_attr_group(net, tb, extack);
|
|
|
|
/* no other attributes should be set */
|
|
goto out;
|
|
}
|
|
|
|
if (tb[NHA_BLACKHOLE]) {
|
|
if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
|
|
tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
|
|
NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
|
|
goto out;
|
|
}
|
|
|
|
cfg->nh_blackhole = 1;
|
|
err = 0;
|
|
goto out;
|
|
}
|
|
|
|
if (!cfg->nh_fdb && !tb[NHA_OIF]) {
|
|
NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
|
|
goto out;
|
|
}
|
|
|
|
if (!cfg->nh_fdb && tb[NHA_OIF]) {
|
|
cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
|
|
if (cfg->nh_ifindex)
|
|
cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
|
|
|
|
if (!cfg->dev) {
|
|
NL_SET_ERR_MSG(extack, "Invalid device index");
|
|
goto out;
|
|
} else if (!(cfg->dev->flags & IFF_UP)) {
|
|
NL_SET_ERR_MSG(extack, "Nexthop device is not up");
|
|
err = -ENETDOWN;
|
|
goto out;
|
|
} else if (!netif_carrier_ok(cfg->dev)) {
|
|
NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
|
|
err = -ENETDOWN;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
err = -EINVAL;
|
|
if (tb[NHA_GATEWAY]) {
|
|
struct nlattr *gwa = tb[NHA_GATEWAY];
|
|
|
|
switch (cfg->nh_family) {
|
|
case AF_INET:
|
|
if (nla_len(gwa) != sizeof(u32)) {
|
|
NL_SET_ERR_MSG(extack, "Invalid gateway");
|
|
goto out;
|
|
}
|
|
cfg->gw.ipv4 = nla_get_be32(gwa);
|
|
break;
|
|
case AF_INET6:
|
|
if (nla_len(gwa) != sizeof(struct in6_addr)) {
|
|
NL_SET_ERR_MSG(extack, "Invalid gateway");
|
|
goto out;
|
|
}
|
|
cfg->gw.ipv6 = nla_get_in6_addr(gwa);
|
|
break;
|
|
default:
|
|
NL_SET_ERR_MSG(extack,
|
|
"Unknown address family for gateway");
|
|
goto out;
|
|
}
|
|
} else {
|
|
/* device only nexthop (no gateway) */
|
|
if (cfg->nh_flags & RTNH_F_ONLINK) {
|
|
NL_SET_ERR_MSG(extack,
|
|
"ONLINK flag can not be set for nexthop without a gateway");
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if (tb[NHA_ENCAP]) {
|
|
cfg->nh_encap = tb[NHA_ENCAP];
|
|
|
|
if (!tb[NHA_ENCAP_TYPE]) {
|
|
NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
|
|
goto out;
|
|
}
|
|
|
|
cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
|
|
err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
|
|
if (err < 0)
|
|
goto out;
|
|
|
|
} else if (tb[NHA_ENCAP_TYPE]) {
|
|
NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
|
|
goto out;
|
|
}
|
|
|
|
|
|
err = 0;
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
/* rtnl */
|
|
static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct net *net = sock_net(skb->sk);
|
|
struct nh_config cfg;
|
|
struct nexthop *nh;
|
|
int err;
|
|
|
|
err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
|
|
if (!err) {
|
|
nh = nexthop_add(net, &cfg, extack);
|
|
if (IS_ERR(nh))
|
|
err = PTR_ERR(nh);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct nhmsg *nhm = nlmsg_data(nlh);
|
|
struct nlattr *tb[NHA_MAX + 1];
|
|
int err, i;
|
|
|
|
err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
|
|
extack);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
err = -EINVAL;
|
|
for (i = 0; i < __NHA_MAX; ++i) {
|
|
if (!tb[i])
|
|
continue;
|
|
|
|
switch (i) {
|
|
case NHA_ID:
|
|
break;
|
|
default:
|
|
NL_SET_ERR_MSG_ATTR(extack, tb[i],
|
|
"Unexpected attribute in request");
|
|
goto out;
|
|
}
|
|
}
|
|
if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
|
|
NL_SET_ERR_MSG(extack, "Invalid values in header");
|
|
goto out;
|
|
}
|
|
|
|
if (!tb[NHA_ID]) {
|
|
NL_SET_ERR_MSG(extack, "Nexthop id is missing");
|
|
goto out;
|
|
}
|
|
|
|
*id = nla_get_u32(tb[NHA_ID]);
|
|
if (!(*id))
|
|
NL_SET_ERR_MSG(extack, "Invalid nexthop id");
|
|
else
|
|
err = 0;
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
/* rtnl */
|
|
static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct net *net = sock_net(skb->sk);
|
|
struct nl_info nlinfo = {
|
|
.nlh = nlh,
|
|
.nl_net = net,
|
|
.portid = NETLINK_CB(skb).portid,
|
|
};
|
|
struct nexthop *nh;
|
|
int err;
|
|
u32 id;
|
|
|
|
err = nh_valid_get_del_req(nlh, &id, extack);
|
|
if (err)
|
|
return err;
|
|
|
|
nh = nexthop_find_by_id(net, id);
|
|
if (!nh)
|
|
return -ENOENT;
|
|
|
|
remove_nexthop(net, nh, &nlinfo);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* rtnl */
|
|
static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct net *net = sock_net(in_skb->sk);
|
|
struct sk_buff *skb = NULL;
|
|
struct nexthop *nh;
|
|
int err;
|
|
u32 id;
|
|
|
|
err = nh_valid_get_del_req(nlh, &id, extack);
|
|
if (err)
|
|
return err;
|
|
|
|
err = -ENOBUFS;
|
|
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
|
|
if (!skb)
|
|
goto out;
|
|
|
|
err = -ENOENT;
|
|
nh = nexthop_find_by_id(net, id);
|
|
if (!nh)
|
|
goto errout_free;
|
|
|
|
err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
|
|
nlh->nlmsg_seq, 0);
|
|
if (err < 0) {
|
|
WARN_ON(err == -EMSGSIZE);
|
|
goto errout_free;
|
|
}
|
|
|
|
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
|
|
out:
|
|
return err;
|
|
errout_free:
|
|
kfree_skb(skb);
|
|
goto out;
|
|
}
|
|
|
|
static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
|
|
bool group_filter, u8 family)
|
|
{
|
|
const struct net_device *dev;
|
|
const struct nh_info *nhi;
|
|
|
|
if (group_filter && !nh->is_group)
|
|
return true;
|
|
|
|
if (!dev_idx && !master_idx && !family)
|
|
return false;
|
|
|
|
if (nh->is_group)
|
|
return true;
|
|
|
|
nhi = rtnl_dereference(nh->nh_info);
|
|
if (family && nhi->family != family)
|
|
return true;
|
|
|
|
dev = nhi->fib_nhc.nhc_dev;
|
|
if (dev_idx && (!dev || dev->ifindex != dev_idx))
|
|
return true;
|
|
|
|
if (master_idx) {
|
|
struct net_device *master;
|
|
|
|
if (!dev)
|
|
return true;
|
|
|
|
master = netdev_master_upper_dev_get((struct net_device *)dev);
|
|
if (!master || master->ifindex != master_idx)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
|
|
int *master_idx, bool *group_filter,
|
|
bool *fdb_filter, struct netlink_callback *cb)
|
|
{
|
|
struct netlink_ext_ack *extack = cb->extack;
|
|
struct nlattr *tb[NHA_MAX + 1];
|
|
struct nhmsg *nhm;
|
|
int err, i;
|
|
u32 idx;
|
|
|
|
err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
|
|
NULL);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
for (i = 0; i <= NHA_MAX; ++i) {
|
|
if (!tb[i])
|
|
continue;
|
|
|
|
switch (i) {
|
|
case NHA_OIF:
|
|
idx = nla_get_u32(tb[i]);
|
|
if (idx > INT_MAX) {
|
|
NL_SET_ERR_MSG(extack, "Invalid device index");
|
|
return -EINVAL;
|
|
}
|
|
*dev_idx = idx;
|
|
break;
|
|
case NHA_MASTER:
|
|
idx = nla_get_u32(tb[i]);
|
|
if (idx > INT_MAX) {
|
|
NL_SET_ERR_MSG(extack, "Invalid master device index");
|
|
return -EINVAL;
|
|
}
|
|
*master_idx = idx;
|
|
break;
|
|
case NHA_GROUPS:
|
|
*group_filter = true;
|
|
break;
|
|
case NHA_FDB:
|
|
*fdb_filter = true;
|
|
break;
|
|
default:
|
|
NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
nhm = nlmsg_data(nlh);
|
|
if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
|
|
NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* rtnl */
|
|
static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
|
|
{
|
|
bool group_filter = false, fdb_filter = false;
|
|
struct nhmsg *nhm = nlmsg_data(cb->nlh);
|
|
int dev_filter_idx = 0, master_idx = 0;
|
|
struct net *net = sock_net(skb->sk);
|
|
struct rb_root *root = &net->nexthop.rb_root;
|
|
struct rb_node *node;
|
|
int idx = 0, s_idx;
|
|
int err;
|
|
|
|
err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
|
|
&group_filter, &fdb_filter, cb);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
s_idx = cb->args[0];
|
|
for (node = rb_first(root); node; node = rb_next(node)) {
|
|
struct nexthop *nh;
|
|
|
|
if (idx < s_idx)
|
|
goto cont;
|
|
|
|
nh = rb_entry(node, struct nexthop, rb_node);
|
|
if (nh_dump_filtered(nh, dev_filter_idx, master_idx,
|
|
group_filter, nhm->nh_family))
|
|
goto cont;
|
|
|
|
err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
|
|
NETLINK_CB(cb->skb).portid,
|
|
cb->nlh->nlmsg_seq, NLM_F_MULTI);
|
|
if (err < 0) {
|
|
if (likely(skb->len))
|
|
goto out;
|
|
|
|
goto out_err;
|
|
}
|
|
cont:
|
|
idx++;
|
|
}
|
|
|
|
out:
|
|
err = skb->len;
|
|
out_err:
|
|
cb->args[0] = idx;
|
|
cb->seq = net->nexthop.seq;
|
|
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
|
|
|
|
return err;
|
|
}
|
|
|
|
static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
|
|
{
|
|
unsigned int hash = nh_dev_hashfn(dev->ifindex);
|
|
struct net *net = dev_net(dev);
|
|
struct hlist_head *head = &net->nexthop.devhash[hash];
|
|
struct hlist_node *n;
|
|
struct nh_info *nhi;
|
|
|
|
hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
|
|
if (nhi->fib_nhc.nhc_dev == dev) {
|
|
if (nhi->family == AF_INET)
|
|
fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
|
|
orig_mtu);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* rtnl */
|
|
static int nh_netdev_event(struct notifier_block *this,
|
|
unsigned long event, void *ptr)
|
|
{
|
|
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
|
struct netdev_notifier_info_ext *info_ext;
|
|
|
|
switch (event) {
|
|
case NETDEV_DOWN:
|
|
case NETDEV_UNREGISTER:
|
|
nexthop_flush_dev(dev, event);
|
|
break;
|
|
case NETDEV_CHANGE:
|
|
if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
|
|
nexthop_flush_dev(dev, event);
|
|
break;
|
|
case NETDEV_CHANGEMTU:
|
|
info_ext = ptr;
|
|
nexthop_sync_mtu(dev, info_ext->ext.mtu);
|
|
rt_cache_flush(dev_net(dev));
|
|
break;
|
|
}
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
static struct notifier_block nh_netdev_notifier = {
|
|
.notifier_call = nh_netdev_event,
|
|
};
|
|
|
|
int register_nexthop_notifier(struct net *net, struct notifier_block *nb)
|
|
{
|
|
return blocking_notifier_chain_register(&net->nexthop.notifier_chain,
|
|
nb);
|
|
}
|
|
EXPORT_SYMBOL(register_nexthop_notifier);
|
|
|
|
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
|
|
{
|
|
return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
|
|
nb);
|
|
}
|
|
EXPORT_SYMBOL(unregister_nexthop_notifier);
|
|
|
|
static void __net_exit nexthop_net_exit(struct net *net)
|
|
{
|
|
rtnl_lock();
|
|
flush_all_nexthops(net);
|
|
rtnl_unlock();
|
|
kfree(net->nexthop.devhash);
|
|
}
|
|
|
|
static int __net_init nexthop_net_init(struct net *net)
|
|
{
|
|
size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
|
|
|
|
net->nexthop.rb_root = RB_ROOT;
|
|
net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
|
|
if (!net->nexthop.devhash)
|
|
return -ENOMEM;
|
|
BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct pernet_operations nexthop_net_ops = {
|
|
.init = nexthop_net_init,
|
|
.exit = nexthop_net_exit,
|
|
};
|
|
|
|
static int __init nexthop_init(void)
|
|
{
|
|
register_pernet_subsys(&nexthop_net_ops);
|
|
|
|
register_netdevice_notifier(&nh_netdev_notifier);
|
|
|
|
rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
|
|
rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
|
|
rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
|
|
rtm_dump_nexthop, 0);
|
|
|
|
rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
|
|
rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
|
|
|
|
rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
|
|
rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
|
|
|
|
return 0;
|
|
}
|
|
subsys_initcall(nexthop_init);
|