diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst index f5d2f2414de8b62fc33aef9550b0db067d924421..22cc7a040dae053ec6da2f44436524b60fdf5a60 100644 --- a/Documentation/arch/x86/boot.rst +++ b/Documentation/arch/x86/boot.rst @@ -77,7 +77,7 @@ Protocol 2.14 BURNT BY INCORRECT COMMIT Protocol 2.15 (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max. ============= ============================================================ -.. note:: + .. note:: The protocol version number should be changed only if the setup header is changed. There is no need to update the version number if boot_params or kernel_info are changed. Additionally, it is recommended to use diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 9f6a5ccbcefef45e62d4447bfadde3052f27b21e..d14d123ad7a028872d5f04de0644685dcba684e3 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -275,12 +275,12 @@ allOf: properties: rx-internal-delay-ps: description: - RGMII Receive Clock Delay defined in pico seconds.This is used for + RGMII Receive Clock Delay defined in pico seconds. This is used for controllers that have configurable RX internal delays. If this property is present then the MAC applies the RX delay. tx-internal-delay-ps: description: - RGMII Transmit Clock Delay defined in pico seconds.This is used for + RGMII Transmit Clock Delay defined in pico seconds. This is used for controllers that have configurable TX internal delays. If this property is present then the MAC applies the TX delay. diff --git a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml index 462ead5a1cec3298bdd5725d4bf2e8f151920b36..2cf3d016db42c11d6b9dfcf18d9a1e0cfb59c079 100644 --- a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml +++ b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml @@ -36,6 +36,7 @@ properties: - qcom,sm8350-ufshc - qcom,sm8450-ufshc - qcom,sm8550-ufshc + - qcom,sm8650-ufshc - const: qcom,ufshc - const: jedec,ufs-2.0 @@ -122,6 +123,7 @@ allOf: - qcom,sm8350-ufshc - qcom,sm8450-ufshc - qcom,sm8550-ufshc + - qcom,sm8650-ufshc then: properties: clocks: diff --git a/MAINTAINERS b/MAINTAINERS index 97f51d5ec1cfd715487a616c78afd40324082dfc..ea790149af795155003eaacca87289e05d55454b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8950,7 +8950,6 @@ S: Maintained F: scripts/get_maintainer.pl GFS2 FILE SYSTEM -M: Bob Peterson <rpeterso@redhat.com> M: Andreas Gruenbacher <agruenba@redhat.com> L: gfs2@lists.linux.dev S: Supported @@ -21769,7 +21768,9 @@ F: Documentation/devicetree/bindings/counter/ti-eqep.yaml F: drivers/counter/ti-eqep.c TI ETHERNET SWITCH DRIVER (CPSW) -R: Grygorii Strashko <grygorii.strashko@ti.com> +R: Siddharth Vadapalli <s-vadapalli@ti.com> +R: Ravi Gunasekaran <r-gunasekaran@ti.com> +R: Roger Quadros <rogerq@kernel.org> L: linux-omap@vger.kernel.org L: netdev@vger.kernel.org S: Maintained @@ -21793,6 +21794,15 @@ F: Documentation/devicetree/bindings/media/i2c/ti,ds90* F: drivers/media/i2c/ds90* F: include/media/i2c/ds90* +TI ICSSG ETHERNET DRIVER (ICSSG) +R: MD Danish Anwar <danishanwar@ti.com> +R: Roger Quadros <rogerq@kernel.org> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/net/ti,icss*.yaml +F: drivers/net/ethernet/ti/icssg/* + TI J721E CSI2RX DRIVER M: Jai Luthra <j-luthra@ti.com> L: linux-media@vger.kernel.org @@ -23692,6 +23702,20 @@ F: arch/x86/kernel/dumpstack.c F: arch/x86/kernel/stacktrace.c F: arch/x86/kernel/unwind_*.c +X86 TRUST DOMAIN EXTENSIONS (TDX) +M: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> +R: Dave Hansen <dave.hansen@linux.intel.com> +L: x86@kernel.org +L: linux-coco@lists.linux.dev +S: Supported +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/tdx +F: arch/x86/boot/compressed/tdx* +F: arch/x86/coco/tdx/ +F: arch/x86/include/asm/shared/tdx.h +F: arch/x86/include/asm/tdx.h +F: arch/x86/virt/vmx/tdx/ +F: drivers/virt/coco/tdx-guest + X86 VDSO M: Andy Lutomirski <luto@kernel.org> L: linux-kernel@vger.kernel.org @@ -23872,8 +23896,7 @@ T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git P: Documentation/filesystems/xfs-maintainer-entry-profile.rst F: Documentation/ABI/testing/sysfs-fs-xfs F: Documentation/admin-guide/xfs.rst -F: Documentation/filesystems/xfs-delayed-logging-design.rst -F: Documentation/filesystems/xfs-self-describing-metadata.rst +F: Documentation/filesystems/xfs-* F: fs/xfs/ F: include/uapi/linux/dqblk_xfs.h F: include/uapi/linux/fsmap.h diff --git a/Makefile b/Makefile index ede0bd24105602eaf7fac59ff7690206ccc1b4a1..724c79bebe7275be0e24469340e43cb91a90e898 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index fd69dfa0cdabbe7aa6bd31241e8c11fad26a9592..a7c9c0e69e5ab2a924dc9cf6b8be2569de07522f 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -140,11 +140,11 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN default 8 config ARCH_MMAP_RND_BITS_MAX - default 24 if 64BIT - default 17 + default 18 if 64BIT + default 13 config ARCH_MMAP_RND_COMPAT_BITS_MAX - default 17 + default 13 # unless you want to implement ACPI on PA-RISC ... ;-) config PM diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 140eaa97bf215dcde034591222b11098f3160d0a..2d73d3c3cd37f8a61b1a9019b8d7821065010db1 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -349,15 +349,7 @@ struct pt_regs; /* forward declaration... */ #define ELF_HWCAP 0 -/* Masks for stack and mmap randomization */ -#define BRK_RND_MASK (is_32bit_task() ? 0x07ffUL : 0x3ffffUL) -#define MMAP_RND_MASK (is_32bit_task() ? 0x1fffUL : 0x3ffffUL) -#define STACK_RND_MASK MMAP_RND_MASK - -struct mm_struct; -extern unsigned long arch_randomize_brk(struct mm_struct *); -#define arch_randomize_brk arch_randomize_brk - +#define STACK_RND_MASK 0x7ff /* 8MB of VA */ #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index c05d121cf5d0f25cc581bd7027acc6d618ca5a9f..982aca20f56f5356927363da73600064657c5e39 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -47,6 +47,8 @@ #ifndef __ASSEMBLY__ +struct rlimit; +unsigned long mmap_upper_limit(struct rlimit *rlim_stack); unsigned long calc_max_stack_size(unsigned long stack_max); /* diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 29e2750f86a410f58da62d30ccc4e25f2e901908..e95a977ba5f376eb813d4c7806d205a92f539880 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -383,7 +383,7 @@ show_cpuinfo (struct seq_file *m, void *v) char cpu_name[60], *p; /* strip PA path from CPU name to not confuse lscpu */ - strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name)); + strscpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name)); p = strrchr(cpu_name, '['); if (p) *(--p) = 0; diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index ab896eff7a1de9515407031c76d4afb1552fe355..98af719d5f85b2b24a4c7af70415e97e90c73803 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -77,7 +77,7 @@ unsigned long calc_max_stack_size(unsigned long stack_max) * indicating that "current" should be used instead of a passed-in * value from the exec bprm as done with arch_pick_mmap_layout(). */ -static unsigned long mmap_upper_limit(struct rlimit *rlim_stack) +unsigned long mmap_upper_limit(struct rlimit *rlim_stack) { unsigned long stack_base; diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index c8a7fc23f63c672456721888db9e293fbf70dafb..f896eed4516c7e3c131751ab5865aa70cfa2d645 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -16,6 +16,9 @@ #include <asm/x86_init.h> #include <asm/cpufeature.h> #include <asm/irq_vectors.h> +#include <asm/xen/hypervisor.h> + +#include <xen/xen.h> #ifdef CONFIG_ACPI_APEI # include <asm/pgtable_types.h> @@ -127,6 +130,17 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap) if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_option_idle_override == IDLE_NOMWAIT) *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH); + + if (xen_initial_domain()) { + /* + * When Linux is running as Xen dom0, the hypervisor is the + * entity in charge of the processor power management, and so + * Xen needs to check the OS capabilities reported in the + * processor capabilities buffer matches what the hypervisor + * driver supports. + */ + xen_sanitize_proc_cap_bits(cap); + } } static inline bool acpi_has_cpu_in_madt(void) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 7048dfacc04b2413acba1054fb4412c7048b8249..a9088250770f2a3d6320040f815f1b9e6cba653b 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -100,4 +100,13 @@ static inline void leave_lazy(enum xen_lazy_mode mode) enum xen_lazy_mode xen_get_lazy_mode(void); +#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI) +void xen_sanitize_proc_cap_bits(uint32_t *buf); +#else +static inline void xen_sanitize_proc_cap_bits(uint32_t *buf) +{ + BUG(); +} +#endif + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d0918a75cb00a02bbc01804c16970fd7b91ab3d2..1a0dd80d81ac301a0fbc9e0d86e14866004ad18a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -63,6 +63,7 @@ int acpi_fix_pin2_polarity __initdata; #ifdef CONFIG_X86_LOCAL_APIC static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; +static bool has_lapic_cpus __initdata; static bool acpi_support_online_capable; #endif @@ -232,6 +233,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) if (!acpi_is_processor_usable(processor->lapic_flags)) return 0; + /* + * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure + * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID + * in x2APIC must be equal or greater than 0xff. + */ + if (has_lapic_cpus && apic_id < 0xff) + return 0; + /* * We need to register disabled CPU as well to permit * counting disabled CPUs. This allows us to size @@ -1114,10 +1123,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) static int __init acpi_parse_madt_lapic_entries(void) { - int count; - int x2count = 0; - int ret; - struct acpi_subtable_proc madt_proc[2]; + int count, x2count = 0; if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; @@ -1126,21 +1132,11 @@ static int __init acpi_parse_madt_lapic_entries(void) acpi_parse_sapic, MAX_LOCAL_APIC); if (!count) { - memset(madt_proc, 0, sizeof(madt_proc)); - madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; - madt_proc[0].handler = acpi_parse_lapic; - madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; - madt_proc[1].handler = acpi_parse_x2apic; - ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, - sizeof(struct acpi_table_madt), - madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); - if (ret < 0) { - pr_err("Error parsing LAPIC/X2APIC entries\n"); - return ret; - } - - count = madt_proc[0].count; - x2count = madt_proc[1].count; + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, + acpi_parse_lapic, MAX_LOCAL_APIC); + has_lapic_cpus = count > 0; + x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, + acpi_parse_x2apic, MAX_LOCAL_APIC); } if (!count && !x2count) { pr_err("No LAPIC entries present\n"); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index cacf2ede62175d40dabf24d5606fcef9d363cc1e..23d8aaf8d9fd1950fcd6c3a43f5af4a013d57a31 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -175,9 +175,6 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp); uc_flags = frame_uc_flags(regs); - if (setup_signal_shadow_stack(ksig)) - return -EFAULT; - if (!user_access_begin(frame, sizeof(*frame))) return -EFAULT; @@ -198,6 +195,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) return -EFAULT; } + if (setup_signal_shadow_stack(ksig)) + return -EFAULT; + /* Set up registers for signal handler */ regs->di = ksig->sig; /* In case the signal handler was declared without prototypes */ diff --git a/block/blk-mq.c b/block/blk-mq.c index e2d11183f62e374f0f83fa56a4e642e50092cd0e..900c1be1fee188b03857a7e12b6fe6978f04a021 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, }; struct request *rq; - if (unlikely(bio_queue_enter(bio))) - return NULL; - if (blk_mq_attempt_bio_merge(q, bio, nsegs)) - goto queue_exit; + return NULL; rq_qos_throttle(q, bio); @@ -2878,35 +2875,23 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, rq_qos_cleanup(q, bio); if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); -queue_exit: - blk_queue_exit(q); return NULL; } -static inline struct request *blk_mq_get_cached_request(struct request_queue *q, - struct blk_plug *plug, struct bio **bio, unsigned int nsegs) +/* return true if this @rq can be used for @bio */ +static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug, + struct bio *bio) { - struct request *rq; - enum hctx_type type, hctx_type; + enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf); + enum hctx_type hctx_type = rq->mq_hctx->type; - if (!plug) - return NULL; - rq = rq_list_peek(&plug->cached_rq); - if (!rq || rq->q != q) - return NULL; + WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq); - if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) { - *bio = NULL; - return NULL; - } - - type = blk_mq_get_hctx_type((*bio)->bi_opf); - hctx_type = rq->mq_hctx->type; if (type != hctx_type && !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT)) - return NULL; - if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) - return NULL; + return false; + if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf)) + return false; /* * If any qos ->throttle() end up blocking, we will have flushed the @@ -2914,12 +2899,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, * before we throttle. */ plug->cached_rq = rq_list_next(rq); - rq_qos_throttle(q, *bio); + rq_qos_throttle(rq->q, bio); blk_mq_rq_time_init(rq, 0); - rq->cmd_flags = (*bio)->bi_opf; + rq->cmd_flags = bio->bi_opf; INIT_LIST_HEAD(&rq->queuelist); - return rq; + return true; } static void bio_set_ioprio(struct bio *bio) @@ -2949,7 +2934,7 @@ void blk_mq_submit_bio(struct bio *bio) struct blk_plug *plug = blk_mq_plug(bio); const int is_sync = op_is_sync(bio->bi_opf); struct blk_mq_hw_ctx *hctx; - struct request *rq; + struct request *rq = NULL; unsigned int nr_segs = 1; blk_status_t ret; @@ -2960,20 +2945,36 @@ void blk_mq_submit_bio(struct bio *bio) return; } - if (!bio_integrity_prep(bio)) - return; - bio_set_ioprio(bio); - rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs); - if (!rq) { - if (!bio) + if (plug) { + rq = rq_list_peek(&plug->cached_rq); + if (rq && rq->q != q) + rq = NULL; + } + if (rq) { + if (!bio_integrity_prep(bio)) return; - rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); - if (unlikely(!rq)) + if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) return; + if (blk_mq_can_use_cached_rq(rq, plug, bio)) + goto done; + percpu_ref_get(&q->q_usage_counter); + } else { + if (unlikely(bio_queue_enter(bio))) + return; + if (!bio_integrity_prep(bio)) + goto fail; + } + + rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); + if (unlikely(!rq)) { +fail: + blk_queue_exit(q); + return; } +done: trace_block_getrq(bio); rq_qos_track(q, rq, bio); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 0ace218783c813142f09ea2f90b3c06d324b16bd..e9b16cbc26f49eb79bb1f336c3903c8f5f86b2c4 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -250,9 +250,6 @@ int ivpu_rpm_get_if_active(struct ivpu_device *vdev) { int ret; - ivpu_dbg(vdev, RPM, "rpm_get_if_active count %d\n", - atomic_read(&vdev->drm.dev->power.usage_count)); - ret = pm_runtime_get_if_active(vdev->drm.dev, false); drm_WARN_ON(&vdev->drm, ret < 0); diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 74d00b0c83fea7c1727b8b94f9a621de40b16022..4a98a859d44d34c9b27b81de7a90cdc779104e07 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -131,7 +131,7 @@ config RASPBERRYPI_FIRMWARE config FW_CFG_SYSFS tristate "QEMU fw_cfg device support in sysfs" - depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || SPARC || X86) + depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || RISCV || SPARC || X86) depends on HAS_IOPORT_MAP default n help diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index a69399a6b7c0052fc5f66948e7928bbc9969ef3f..1448f61173b357f90802c905f6c5076895491626 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -211,7 +211,7 @@ static void fw_cfg_io_cleanup(void) /* arch-specific ctrl & data register offsets are not available in ACPI, DT */ #if !(defined(FW_CFG_CTRL_OFF) && defined(FW_CFG_DATA_OFF)) -# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64)) +# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV)) # define FW_CFG_CTRL_OFF 0x08 # define FW_CFG_DATA_OFF 0x00 # define FW_CFG_DMA_OFF 0x10 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index afec09930efa953c88f34f94dc6818c3c6f2c448..9d92ca1576771bc236f73f507df7c1de473cef8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -248,6 +248,7 @@ extern int amdgpu_umsch_mm; extern int amdgpu_seamless; extern int amdgpu_user_partt_mode; +extern int amdgpu_agp; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index df3ecfa9e13f5d87d3e67397e22d0d339c62a809..e50be65000303ac7ff71130fc0363dc7f1c03d29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -207,7 +207,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } for (i = 0; i < p->nchunks; i++) { - struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; + struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 3095a3a864af713c57ebcee2b192dbc99866e7fe..8f24cabe21554688126ad061dc7d6f288c1e4fad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -207,6 +207,7 @@ int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE; int amdgpu_umsch_mm; int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; +int amdgpu_agp = -1; /* auto */ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -961,6 +962,15 @@ module_param_named(seamless, amdgpu_seamless, int, 0444); MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); +/** + * DOC: agp (int) + * Enable the AGP aperture. This provides an aperture in the GPU's internal + * address space for direct access to system memory. Note that these accesses + * are non-snooped, so they are only used for access to uncached memory. + */ +MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)"); +module_param_named(agp, amdgpu_agp, int, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 32b701cc0376d3451f480e50dd0405e142ebedc9..a21045d018f2b6efe6a281c1cec74fd25357c064 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1473,6 +1473,11 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links : node_num_links; } + /* popluate the connected port num info if supported and available */ + if (ta_port_num_support && topology->nodes[i].num_links) { + memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num, + sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM); + } /* reflect the topology information for bi-directionality */ if (requires_reflection && topology->nodes[i].num_hops) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 5d36ad3f48c74ac298ddec9c27cef9494c19979a..c4d9cbde55b9bc58799aa1acc9fe4eea29d1a98a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -150,6 +150,7 @@ struct psp_xgmi_node_info { uint8_t is_sharing_enabled; enum ta_xgmi_assigned_sdma_engine sdma_engine; uint8_t num_links; + struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM]; }; struct psp_xgmi_topology_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 84e5987b14e05ecd2c52c9d93635f4d182a34a5f..a3dc68e989108e52c71a24bd97fff44f6183f8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1188,7 +1188,7 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, } if (block_obj->hw_ops->query_ras_error_count) - block_obj->hw_ops->query_ras_error_count(adev, &err_data); + block_obj->hw_ops->query_ras_error_count(adev, err_data); if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) || (info->head.block == AMDGPU_RAS_BLOCK__GFX) || diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 65949cc7abb93243aea94860cf87eef2327e849a..07d930339b0781bd28f2e8d0b308f09617c0eeb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -398,6 +398,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) * amdgpu_uvd_entity_init - init entity * * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer to check * * Initialize the entity used for handle management in the kernel driver. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 0954447f689d9e5477c67a1bf71fb9f53cde063b..59acf424a078f0eb5e0182ed5dc082d86d4421a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -230,6 +230,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) * amdgpu_vce_entity_init - init entity * * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer to check * * Initialize the entity used for handle management in the kernel driver. */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 0ec7b061d7c2035ac21a1a8b9c858de58d126396..a5a05c16c10d7be2ea1b86fbdcf76699551a8fd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -675,7 +675,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 6dce9b29f675631c2049d1f2ef50b5ea64bff7fc..23d7b548d13f446766c0adc6f051f9b492111efb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -640,8 +640,9 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH); - if (!amdgpu_sriov_vf(adev) || - (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0))) + if (!amdgpu_sriov_vf(adev) && + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) && + (amdgpu_agp == 1)) amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index bde25eb4ed8e2cb1e3f0adf62897cfab076db7db..2ac5820e9c9241431b1fda853dddc9242c04790e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1630,7 +1630,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, } else { amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) amdgpu_gmc_agp_location(adev, mc); } /* base offset of vram pages */ @@ -2170,8 +2170,6 @@ static int gmc_v9_0_sw_fini(void *handle) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) amdgpu_gmc_sysfs_fini(adev); - adev->gmc.num_mem_partitions = 0; - kfree(adev->gmc.mem_partitions); amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); @@ -2185,6 +2183,9 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); + adev->gmc.num_mem_partitions = 0; + kfree(adev->gmc.mem_partitions); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index ea142611be1c06ac9f396d40b4252a82f49fb370..9b0146732e13ced30b38336fc76e0d46922ff77e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -130,6 +130,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; int i; + if (amdgpu_sriov_vf(adev)) + return; + inst_mask = adev->aid_mask; for_each_inst(i, inst_mask) { /* Program the AGP BAR */ @@ -139,9 +142,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); - if (amdgpu_sriov_vf(adev)) - return; - /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6f99f6754c119efddf9cc97e2db21f57bd9ad8bb..ee97814ebd994721f4cb161b0e3df40693086265 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2079,7 +2079,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) struct dmub_srv_create_params create_params; struct dmub_srv_region_params region_params; struct dmub_srv_region_info region_info; - struct dmub_srv_fb_params fb_params; + struct dmub_srv_memory_params memory_params; struct dmub_srv_fb_info *fb_info; struct dmub_srv *dmub_srv; const struct dmcub_firmware_header_v1_0 *hdr; @@ -2182,6 +2182,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) adev->dm.dmub_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + PSP_HEADER_BYTES; + region_params.is_mailbox_in_inbox = false; status = dmub_srv_calc_region_info(dmub_srv, ®ion_params, ®ion_info); @@ -2205,10 +2206,10 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) return r; /* Rebase the regions on the framebuffer address. */ - memset(&fb_params, 0, sizeof(fb_params)); - fb_params.cpu_addr = adev->dm.dmub_bo_cpu_addr; - fb_params.gpu_addr = adev->dm.dmub_bo_gpu_addr; - fb_params.region_info = ®ion_info; + memset(&memory_params, 0, sizeof(memory_params)); + memory_params.cpu_fb_addr = adev->dm.dmub_bo_cpu_addr; + memory_params.gpu_fb_addr = adev->dm.dmub_bo_gpu_addr; + memory_params.region_info = ®ion_info; adev->dm.dmub_fb_info = kzalloc(sizeof(*adev->dm.dmub_fb_info), GFP_KERNEL); @@ -2220,7 +2221,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) return -ENOMEM; } - status = dmub_srv_calc_fb_info(dmub_srv, &fb_params, fb_info); + status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info); if (status != DMUB_STATUS_OK) { DRM_ERROR("Error calculating DMUB FB info: %d\n", status); return -EINVAL; @@ -7481,6 +7482,9 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, int i; int result = -EIO; + if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported) + return result; + cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL); if (!cmd.payloads) @@ -9603,14 +9607,14 @@ static bool should_reset_plane(struct drm_atomic_state *state, struct drm_plane *other; struct drm_plane_state *old_other_state, *new_other_state; struct drm_crtc_state *new_crtc_state; + struct amdgpu_device *adev = drm_to_adev(plane->dev); int i; /* - * TODO: Remove this hack once the checks below are sufficient - * enough to determine when we need to reset all the planes on - * the stream. + * TODO: Remove this hack for all asics once it proves that the + * fast updates works fine on DCN3.2+. */ - if (state->allow_modeset) + if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset) return true; /* Exit early if we know that we're adding or removing the plane. */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index ed784cf27d396f10fe507a1a4a41a009f01e15a5..c7a29bb737e24d0394e770529f1d3f43d0333aae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -536,11 +536,8 @@ bool dm_helpers_dp_read_dpcd( struct amdgpu_dm_connector *aconnector = link->priv; - if (!aconnector) { - drm_dbg_dp(aconnector->base.dev, - "Failed to find connector for link!\n"); + if (!aconnector) return false; - } return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address, data, size) == size; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index d3b13d362edacc676c8b70a8a10bfa028728f7f3..11da0eebee6c4b0afd34ee5b7a499e046c4be413 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1604,31 +1604,31 @@ enum dc_status dm_dp_mst_is_port_support_mode( unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; unsigned int max_compressed_bw_in_kbps = 0; struct dc_dsc_bw_range bw_range = {0}; - struct drm_dp_mst_topology_mgr *mst_mgr; + uint16_t full_pbn = aconnector->mst_output_port->full_pbn; /* - * check if the mode could be supported if DSC pass-through is supported - * AND check if there enough bandwidth available to support the mode - * with DSC enabled. + * Consider the case with the depth of the mst topology tree is equal or less than 2 + * A. When dsc bitstream can be transmitted along the entire path + * 1. dsc is possible between source and branch/leaf device (common dsc params is possible), AND + * 2. dsc passthrough supported at MST branch, or + * 3. dsc decoding supported at leaf MST device + * Use maximum dsc compression as bw constraint + * B. When dsc bitstream cannot be transmitted along the entire path + * Use native bw as bw constraint */ if (is_dsc_common_config_possible(stream, &bw_range) && - aconnector->mst_output_port->passthrough_aux) { - mst_mgr = aconnector->mst_output_port->mgr; - mutex_lock(&mst_mgr->lock); - + (aconnector->mst_output_port->passthrough_aux || + aconnector->dsc_aux == &aconnector->mst_output_port->aux)) { cur_link_settings = stream->link->verified_link_cap; upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, - &cur_link_settings - ); - down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn); + &cur_link_settings); + down_link_bw_in_kbps = kbps_from_pbn(full_pbn); /* pick the bottleneck */ end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps); - mutex_unlock(&mst_mgr->lock); - /* * use the maximum dsc compression bandwidth as the required * bandwidth for the mode @@ -1643,8 +1643,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( /* check if mode could be supported within full_pbn */ bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); - - if (pbn > aconnector->mst_output_port->full_pbn) + if (pbn > full_pbn) return DC_FAIL_BANDWIDTH_VALIDATE; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 0fa4fcd00de2c982ebc2634c7a74f19585c3b721..507a7cf56711f3d83d62e64e4c7f417d8b8ab398 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -820,22 +820,22 @@ static void dcn35_set_idle_state(struct clk_mgr *clk_mgr_base, bool allow_idle) if (dc->config.disable_ips == DMUB_IPS_ENABLE || dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) { - val |= DMUB_IPS1_ALLOW_MASK; - val |= DMUB_IPS2_ALLOW_MASK; - } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) { val = val & ~DMUB_IPS1_ALLOW_MASK; val = val & ~DMUB_IPS2_ALLOW_MASK; - } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) { - val |= DMUB_IPS1_ALLOW_MASK; - val = val & ~DMUB_IPS2_ALLOW_MASK; - } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) { + } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) { val |= DMUB_IPS1_ALLOW_MASK; val |= DMUB_IPS2_ALLOW_MASK; + } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) { + val = val & ~DMUB_IPS1_ALLOW_MASK; + val |= DMUB_IPS2_ALLOW_MASK; + } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) { + val = val & ~DMUB_IPS1_ALLOW_MASK; + val = val & ~DMUB_IPS2_ALLOW_MASK; } if (!allow_idle) { - val = val & ~DMUB_IPS1_ALLOW_MASK; - val = val & ~DMUB_IPS2_ALLOW_MASK; + val |= DMUB_IPS1_ALLOW_MASK; + val |= DMUB_IPS2_ALLOW_MASK; } dcn35_smu_write_ips_scratch(clk_mgr, val); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7b9bf5cb45299974757bd16d608e41d444ef3b0f..76b47f17812797b95f0bf3fe5ef5ed966cbcacc3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3178,7 +3178,7 @@ static bool update_planes_and_stream_state(struct dc *dc, struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(&context->res_ctx, context->streams[i]); - if (otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) + if (otg_master && otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) resource_build_test_pattern_params(&context->res_ctx, otg_master); } } @@ -4934,8 +4934,8 @@ bool dc_dmub_is_ips_idle_state(struct dc *dc) if (dc->hwss.get_idle_state) idle_state = dc->hwss.get_idle_state(dc); - if ((idle_state & DMUB_IPS1_ALLOW_MASK) || - (idle_state & DMUB_IPS2_ALLOW_MASK)) + if (!(idle_state & DMUB_IPS1_ALLOW_MASK) || + !(idle_state & DMUB_IPS2_ALLOW_MASK)) return true; return false; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 1d48278cba96c587be346e32c4ce46bfe6aaf85e..a1f1d100399275aba741c7dcd852c30fbcac27cf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -5190,6 +5190,9 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy( sec_next = sec_pipe->next_odm_pipe; sec_prev = sec_pipe->prev_odm_pipe; + if (pri_pipe == NULL) + return false; + *sec_pipe = *pri_pipe; sec_pipe->top_pipe = sec_top; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index e4c007203318bf0b9ad3f3f9b1d2fd73973ca4f9..0e07699c1e83529acfbf48ba2f36bea06a0cd884 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1202,11 +1202,11 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) allow_state = dc->hwss.get_idle_state(dc); dc->hwss.set_idle_state(dc, false); - if (allow_state & DMUB_IPS2_ALLOW_MASK) { + if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) { // Wait for evaluation time udelay(dc->debug.ips2_eval_delay_us); commit_state = dc->hwss.get_idle_state(dc); - if (commit_state & DMUB_IPS2_COMMIT_MASK) { + if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) { // Tell PMFW to exit low power state dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); @@ -1216,7 +1216,7 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) for (i = 0; i < max_num_polls; ++i) { commit_state = dc->hwss.get_idle_state(dc); - if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) + if (commit_state & DMUB_IPS2_COMMIT_MASK) break; udelay(1); @@ -1235,10 +1235,10 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) } dc_dmub_srv_notify_idle(dc, false); - if (allow_state & DMUB_IPS1_ALLOW_MASK) { + if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) { for (i = 0; i < max_num_polls; ++i) { commit_state = dc->hwss.get_idle_state(dc); - if (!(commit_state & DMUB_IPS1_COMMIT_MASK)) + if (commit_state & DMUB_IPS1_COMMIT_MASK) break; udelay(1); diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index cea666ea66c6144cad038aa9a8d833b2d36b0a78..fcb825e4f1bb8f8dfba4107bc3bb9c642aa726f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -177,6 +177,7 @@ struct dc_panel_patch { unsigned int disable_fams; unsigned int skip_avmute; unsigned int mst_start_top_delay; + unsigned int remove_sink_ext_caps; }; struct dc_edid_caps { diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c index 001f9eb66920751dbbe992d450579b9b4790b370..62a8f0b56006201d6b8db01a0641d5a355887cbb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c @@ -261,12 +261,6 @@ static void enc35_stream_encoder_enable( /* invalid mode ! */ ASSERT_CRITICAL(false); } - - REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1); - REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1); - } else { - REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0); - REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0); } } @@ -436,6 +430,8 @@ static void enc35_disable_fifo(struct stream_encoder *enc) struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0); + REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0); + REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0); } static void enc35_enable_fifo(struct stream_encoder *enc) @@ -443,6 +439,8 @@ static void enc35_enable_fifo(struct stream_encoder *enc) struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7); + REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1); + REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1); enc35_reset_fifo(enc, true); enc35_reset_fifo(enc, false); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index d6f0f857c05af8d6d9298225b58f10291cd5c92a..f2fe523f914f1179106426684e010cfea1b170ef 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -1088,6 +1088,9 @@ static bool detect_link_and_local_sink(struct dc_link *link, if (sink->edid_caps.panel_patch.skip_scdc_overwrite) link->ctx->dc->debug.hdmi20_disable = true; + if (sink->edid_caps.panel_patch.remove_sink_ext_caps) + link->dpcd_sink_ext_caps.raw = 0; + if (dc_is_hdmi_signal(link->connector_signal)) read_scdc_caps(link->ddc, link->local_sink); diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 9665ada0f894b253619dc57e1919924e8afa7b52..df63aa8f01e98d1e8efe45ba295bf5232f326f5d 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -195,6 +195,7 @@ struct dmub_srv_region_params { uint32_t vbios_size; const uint8_t *fw_inst_const; const uint8_t *fw_bss_data; + bool is_mailbox_in_inbox; }; /** @@ -214,20 +215,25 @@ struct dmub_srv_region_params { */ struct dmub_srv_region_info { uint32_t fb_size; + uint32_t inbox_size; uint8_t num_regions; struct dmub_region regions[DMUB_WINDOW_TOTAL]; }; /** - * struct dmub_srv_fb_params - parameters used for driver fb setup + * struct dmub_srv_memory_params - parameters used for driver fb setup * @region_info: region info calculated by dmub service - * @cpu_addr: base cpu address for the framebuffer - * @gpu_addr: base gpu virtual address for the framebuffer + * @cpu_fb_addr: base cpu address for the framebuffer + * @cpu_inbox_addr: base cpu address for the gart + * @gpu_fb_addr: base gpu virtual address for the framebuffer + * @gpu_inbox_addr: base gpu virtual address for the gart */ -struct dmub_srv_fb_params { +struct dmub_srv_memory_params { const struct dmub_srv_region_info *region_info; - void *cpu_addr; - uint64_t gpu_addr; + void *cpu_fb_addr; + void *cpu_inbox_addr; + uint64_t gpu_fb_addr; + uint64_t gpu_inbox_addr; }; /** @@ -563,8 +569,8 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub, * DMUB_STATUS_OK - success * DMUB_STATUS_INVALID - unspecified error */ -enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub, - const struct dmub_srv_fb_params *params, +enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub, + const struct dmub_srv_memory_params *params, struct dmub_srv_fb_info *out); /** diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index e43e8d4bfe375e93f9e75d285e57bb41cffc44de..22fc4ba96defd1c0abd6e031d8a4f8cc07f7d4a9 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -434,7 +434,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub, uint32_t fw_state_size = DMUB_FW_STATE_SIZE; uint32_t trace_buffer_size = DMUB_TRACE_BUFFER_SIZE; uint32_t scratch_mem_size = DMUB_SCRATCH_MEM_SIZE; - + uint32_t previous_top = 0; if (!dmub->sw_init) return DMUB_STATUS_INVALID; @@ -459,8 +459,15 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub, bios->base = dmub_align(stack->top, 256); bios->top = bios->base + params->vbios_size; - mail->base = dmub_align(bios->top, 256); - mail->top = mail->base + DMUB_MAILBOX_SIZE; + if (params->is_mailbox_in_inbox) { + mail->base = 0; + mail->top = mail->base + DMUB_MAILBOX_SIZE; + previous_top = bios->top; + } else { + mail->base = dmub_align(bios->top, 256); + mail->top = mail->base + DMUB_MAILBOX_SIZE; + previous_top = mail->top; + } fw_info = dmub_get_fw_meta_info(params); @@ -479,7 +486,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub, dmub->fw_version = fw_info->fw_version; } - trace_buff->base = dmub_align(mail->top, 256); + trace_buff->base = dmub_align(previous_top, 256); trace_buff->top = trace_buff->base + dmub_align(trace_buffer_size, 64); fw_state->base = dmub_align(trace_buff->top, 256); @@ -490,11 +497,14 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub, out->fb_size = dmub_align(scratch_mem->top, 4096); + if (params->is_mailbox_in_inbox) + out->inbox_size = dmub_align(mail->top, 4096); + return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub, - const struct dmub_srv_fb_params *params, +enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub, + const struct dmub_srv_memory_params *params, struct dmub_srv_fb_info *out) { uint8_t *cpu_base; @@ -509,8 +519,8 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub, if (params->region_info->num_regions != DMUB_NUM_WINDOWS) return DMUB_STATUS_INVALID; - cpu_base = (uint8_t *)params->cpu_addr; - gpu_base = params->gpu_addr; + cpu_base = (uint8_t *)params->cpu_fb_addr; + gpu_base = params->gpu_fb_addr; for (i = 0; i < DMUB_NUM_WINDOWS; ++i) { const struct dmub_region *reg = @@ -518,6 +528,12 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub, out->fb[i].cpu_addr = cpu_base + reg->base; out->fb[i].gpu_addr = gpu_base + reg->base; + + if (i == DMUB_WINDOW_4_MAILBOX && params->cpu_inbox_addr != 0) { + out->fb[i].cpu_addr = (uint8_t *)params->cpu_inbox_addr + reg->base; + out->fb[i].gpu_addr = params->gpu_inbox_addr + reg->base; + } + out->fb[i].size = reg->top - reg->base; } @@ -707,9 +723,16 @@ enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) return DMUB_STATUS_INVALID; if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { - dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - dmub->inbox1_rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub); - dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; + uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); + + if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) { + return DMUB_STATUS_HW_FAILURE; + } else { + dmub->inbox1_rb.rptr = rptr; + dmub->inbox1_rb.wrpt = wptr; + dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; + } } return DMUB_STATUS_OK; @@ -743,6 +766,11 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, if (!dmub->hw_init) return DMUB_STATUS_INVALID; + if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity || + dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) { + return DMUB_STATUS_HW_FAILURE; + } + if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) return DMUB_STATUS_OK; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index dab35d878a905ce50ae013d6f4e9a750599183ce..fef2d290f3f2526b2a196649e7e74a0df154b6ec 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -123,7 +123,7 @@ typedef enum { VOLTAGE_GUARDBAND_COUNT } GFX_GUARDBAND_e; -#define SMU_METRICS_TABLE_VERSION 0x8 +#define SMU_METRICS_TABLE_VERSION 0x9 typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; @@ -211,6 +211,14 @@ typedef struct __attribute__((packed, aligned(4))) { //XGMI Data tranfser size uint64_t XgmiReadDataSizeAcc[8];//in KByte uint64_t XgmiWriteDataSizeAcc[8];//in KByte + + //PCIE BW Data and error count + uint32_t PcieBandwidth[4]; + uint32_t PCIeL0ToRecoveryCountAcc; // The Pcie counter itself is accumulated + uint32_t PCIenReplayAAcc; // The Pcie counter itself is accumulated + uint32_t PCIenReplayARolloverCountAcc; // The Pcie counter itself is accumulated + uint32_t PCIeNAKSentCountAcc; // The Pcie counter itself is accumulated + uint32_t PCIeNAKReceivedCountAcc; // The Pcie counter itself is accumulated } MetricsTable_t; #define SMU_VF_METRICS_TABLE_VERSION 0x3 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 891605d4975f4e4460f83cb153f887a54cc16146..0e5a77c3c2e216362b2e5363f1ec25f62940ede3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1454,7 +1454,7 @@ static int smu_v13_0_6_register_irq_handler(struct smu_context *smu) static int smu_v13_0_6_notify_unload(struct smu_context *smu) { - if (smu->smc_fw_version <= 0x553500) + if (amdgpu_in_reset(smu->adev)) return 0; dev_dbg(smu->adev->dev, "Notify PMFW about driver unload"); @@ -2095,6 +2095,14 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table smu_v13_0_6_get_current_pcie_link_speed(smu); gpu_metrics->pcie_bandwidth_acc = SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]); + gpu_metrics->pcie_bandwidth_inst = + SMUQ10_ROUND(metrics->PcieBandwidth[0]); + gpu_metrics->pcie_l0_to_recov_count_acc = + metrics->PCIeL0ToRecoveryCountAcc; + gpu_metrics->pcie_replay_count_acc = + metrics->PCIenReplayAAcc; + gpu_metrics->pcie_replay_rover_count_acc = + metrics->PCIenReplayARolloverCountAcc; } gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); diff --git a/drivers/gpu/drm/ci/xfails/requirements.txt b/drivers/gpu/drm/ci/xfails/requirements.txt index d8856d1581fdb3507c0ff8ff3ea8a1ae5be92862..e9994c9db799bfb9a555edbaea2d84103db53e79 100644 --- a/drivers/gpu/drm/ci/xfails/requirements.txt +++ b/drivers/gpu/drm/ci/xfails/requirements.txt @@ -5,7 +5,7 @@ termcolor==2.3.0 certifi==2023.7.22 charset-normalizer==3.2.0 idna==3.4 -pip==23.2.1 +pip==23.3 python-gitlab==3.15.0 requests==2.31.0 requests-toolbelt==1.0.0 @@ -13,5 +13,5 @@ ruamel.yaml==0.17.32 ruamel.yaml.clib==0.2.7 setuptools==68.0.0 tenacity==8.2.3 -urllib3==2.0.4 -wheel==0.41.1 \ No newline at end of file +urllib3==2.0.7 +wheel==0.41.1 diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index d5c15292ae93781335a8e8a441be94f9bb235f04..3d92f66e550c3885d1639c9450487228758afd76 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -336,6 +336,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Legion Go 8APU1 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Legion Go 8APU1"), + }, + .driver_data = (void *)&lcd1600x2560_leftside_up, }, { /* Lenovo Yoga Book X90F / X90L */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h index 82b267c111470af7a09726d2dab5d9aa632d3b7c..460459af272d6ffa4fe8f9a8770d72220751a78c 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h @@ -14,7 +14,7 @@ struct nvkm_event { int index_nr; spinlock_t refs_lock; - spinlock_t list_lock; + rwlock_t list_lock; int *refs; struct list_head ntfy; @@ -38,7 +38,7 @@ nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev, int types_nr, int index_nr, struct nvkm_event *event) { spin_lock_init(&event->refs_lock); - spin_lock_init(&event->list_lock); + rwlock_init(&event->list_lock); return __nvkm_event_init(func, subdev, types_nr, index_nr, event); } diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index d8c92521226d97c582e4b6f64c69fc90b469762a..f28f9a857458682f80f0a8687ed2edd92fa921d9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -726,6 +726,11 @@ nouveau_display_create(struct drm_device *dev) if (nouveau_modeset != 2) { ret = nvif_disp_ctor(&drm->client.device, "kmsDisp", 0, &disp->disp); + /* no display hw */ + if (ret == -ENODEV) { + ret = 0; + goto disp_create_err; + } if (!ret && (disp->disp.outp_mask || drm->vbios.dcb.entries)) { nouveau_display_create_properties(dev); diff --git a/drivers/gpu/drm/nouveau/nvkm/core/event.c b/drivers/gpu/drm/nouveau/nvkm/core/event.c index a6c877135598f7cd7447bfc50465b51e2d0fd4fc..61fed7792e415cb72d9c75664d9a9e19af7539ef 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/event.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/event.c @@ -81,17 +81,17 @@ nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy) static void nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy) { - spin_lock_irq(&ntfy->event->list_lock); + write_lock_irq(&ntfy->event->list_lock); list_del_init(&ntfy->head); - spin_unlock_irq(&ntfy->event->list_lock); + write_unlock_irq(&ntfy->event->list_lock); } static void nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy) { - spin_lock_irq(&ntfy->event->list_lock); + write_lock_irq(&ntfy->event->list_lock); list_add_tail(&ntfy->head, &ntfy->event->ntfy); - spin_unlock_irq(&ntfy->event->list_lock); + write_unlock_irq(&ntfy->event->list_lock); } static void @@ -176,7 +176,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits) return; nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id); - spin_lock_irqsave(&event->list_lock, flags); + read_lock_irqsave(&event->list_lock, flags); list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) { if (ntfy->id == id && ntfy->bits & bits) { @@ -185,7 +185,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits) } } - spin_unlock_irqrestore(&event->list_lock, flags); + read_unlock_irqrestore(&event->list_lock, flags); } void diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index e31f9641114b7127bdfe92e4e4df680ac16a9aee..dc44f5c7833f60c165d5c01a0b2afd03e3e45294 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -689,8 +689,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc) struct nvfw_gsp_rpc *rpc; rpc = r535_gsp_cmdq_get(gsp, ALIGN(sizeof(*rpc) + argc, sizeof(u64))); - if (!rpc) - return NULL; + if (IS_ERR(rpc)) + return ERR_CAST(rpc); rpc->header_version = 0x03000000; rpc->signature = ('C' << 24) | ('P' << 16) | ('R' << 8) | 'V'; @@ -1159,7 +1159,7 @@ static void r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode, MUX_METHOD_DATA_ELEMENT *part) { - acpi_handle iter = NULL, handle_mux; + acpi_handle iter = NULL, handle_mux = NULL; acpi_status status; unsigned long long value; diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index affcfb243f0f52a5848a48016eac8e78ef709f7e..35f762872b8a58c2f7e8fd4867bb0e139aea5cf0 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val) { struct dw_i2c_dev *dev = context; - *val = readl_relaxed(dev->base + reg); + *val = readl(dev->base + reg); return 0; } @@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val) { struct dw_i2c_dev *dev = context; - writel_relaxed(val, dev->base + reg); + writel(val, dev->base + reg); return 0; } @@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val) { struct dw_i2c_dev *dev = context; - *val = swab32(readl_relaxed(dev->base + reg)); + *val = swab32(readl(dev->base + reg)); return 0; } @@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val) { struct dw_i2c_dev *dev = context; - writel_relaxed(swab32(val), dev->base + reg); + writel(swab32(val), dev->base + reg); return 0; } @@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val) { struct dw_i2c_dev *dev = context; - *val = readw_relaxed(dev->base + reg) | - (readw_relaxed(dev->base + reg + 2) << 16); + *val = readw(dev->base + reg) | + (readw(dev->base + reg + 2) << 16); return 0; } @@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val) { struct dw_i2c_dev *dev = context; - writew_relaxed(val, dev->base + reg); - writew_relaxed(val >> 16, dev->base + reg + 2); + writew(val, dev->base + reg); + writew(val >> 16, dev->base + reg + 2); return 0; } diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index 041a76f71a49cc5db369dcb4bc1340ca4c04d154..e106af83cef4da5626e534325805aad70be2f617 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -771,8 +771,8 @@ static int ocores_i2c_resume(struct device *dev) return ocores_init(dev, i2c); } -static DEFINE_SIMPLE_DEV_PM_OPS(ocores_i2c_pm, - ocores_i2c_suspend, ocores_i2c_resume); +static DEFINE_NOIRQ_DEV_PM_OPS(ocores_i2c_pm, + ocores_i2c_suspend, ocores_i2c_resume); static struct platform_driver ocores_i2c_driver = { .probe = ocores_i2c_probe, diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 1d76482427492113886af4ae84943092106e8479..76f79b68cef84548b86def688b6ba95f4aa46335 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -265,6 +265,9 @@ struct pxa_i2c { u32 hs_mask; struct i2c_bus_recovery_info recovery; + struct pinctrl *pinctrl; + struct pinctrl_state *pinctrl_default; + struct pinctrl_state *pinctrl_recovery; }; #define _IBMR(i2c) ((i2c)->reg_ibmr) @@ -1299,12 +1302,13 @@ static void i2c_pxa_prepare_recovery(struct i2c_adapter *adap) */ gpiod_set_value(i2c->recovery.scl_gpiod, ibmr & IBMR_SCLS); gpiod_set_value(i2c->recovery.sda_gpiod, ibmr & IBMR_SDAS); + + WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery)); } static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap) { struct pxa_i2c *i2c = adap->algo_data; - struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; u32 isr; /* @@ -1318,7 +1322,7 @@ static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap) i2c_pxa_do_reset(i2c); } - WARN_ON(pinctrl_select_state(bri->pinctrl, bri->pins_default)); + WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default)); dev_dbg(&i2c->adap.dev, "recovery: IBMR 0x%08x ISR 0x%08x\n", readl(_IBMR(i2c)), readl(_ISR(i2c))); @@ -1340,20 +1344,76 @@ static int i2c_pxa_init_recovery(struct pxa_i2c *i2c) if (IS_ENABLED(CONFIG_I2C_PXA_SLAVE)) return 0; - bri->pinctrl = devm_pinctrl_get(dev); - if (PTR_ERR(bri->pinctrl) == -ENODEV) { - bri->pinctrl = NULL; + i2c->pinctrl = devm_pinctrl_get(dev); + if (PTR_ERR(i2c->pinctrl) == -ENODEV) + i2c->pinctrl = NULL; + if (IS_ERR(i2c->pinctrl)) + return PTR_ERR(i2c->pinctrl); + + if (!i2c->pinctrl) + return 0; + + i2c->pinctrl_default = pinctrl_lookup_state(i2c->pinctrl, + PINCTRL_STATE_DEFAULT); + i2c->pinctrl_recovery = pinctrl_lookup_state(i2c->pinctrl, "recovery"); + + if (IS_ERR(i2c->pinctrl_default) || IS_ERR(i2c->pinctrl_recovery)) { + dev_info(dev, "missing pinmux recovery information: %ld %ld\n", + PTR_ERR(i2c->pinctrl_default), + PTR_ERR(i2c->pinctrl_recovery)); + return 0; + } + + /* + * Claiming GPIOs can influence the pinmux state, and may glitch the + * I2C bus. Do this carefully. + */ + bri->scl_gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN); + if (bri->scl_gpiod == ERR_PTR(-EPROBE_DEFER)) + return -EPROBE_DEFER; + if (IS_ERR(bri->scl_gpiod)) { + dev_info(dev, "missing scl gpio recovery information: %pe\n", + bri->scl_gpiod); + return 0; + } + + /* + * We have SCL. Pull SCL low and wait a bit so that SDA glitches + * have no effect. + */ + gpiod_direction_output(bri->scl_gpiod, 0); + udelay(10); + bri->sda_gpiod = devm_gpiod_get(dev, "sda", GPIOD_OUT_HIGH_OPEN_DRAIN); + + /* Wait a bit in case of a SDA glitch, and then release SCL. */ + udelay(10); + gpiod_direction_output(bri->scl_gpiod, 1); + + if (bri->sda_gpiod == ERR_PTR(-EPROBE_DEFER)) + return -EPROBE_DEFER; + + if (IS_ERR(bri->sda_gpiod)) { + dev_info(dev, "missing sda gpio recovery information: %pe\n", + bri->sda_gpiod); return 0; } - if (IS_ERR(bri->pinctrl)) - return PTR_ERR(bri->pinctrl); bri->prepare_recovery = i2c_pxa_prepare_recovery; bri->unprepare_recovery = i2c_pxa_unprepare_recovery; + bri->recover_bus = i2c_generic_scl_recovery; i2c->adap.bus_recovery_info = bri; - return 0; + /* + * Claiming GPIOs can change the pinmux state, which confuses the + * pinctrl since pinctrl's idea of the current setting is unaffected + * by the pinmux change caused by claiming the GPIO. Work around that + * by switching pinctrl to the GPIO state here. We do it this way to + * avoid glitching the I2C bus. + */ + pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery); + + return pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default); } static int i2c_pxa_probe(struct platform_device *dev) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index a8c89df1a997866c31238d94bb5cd26c5acf8dcb..9a7a74239eabb7cd3d2a3a077316d54833ec18cb 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2379,12 +2379,12 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, break; } + if (!shr) + gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order)); + its_write_baser(its, baser, val); tmp = baser->val; - if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE) - tmp &= ~GITS_BASER_SHAREABILITY_MASK; - if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) { /* * Shareability didn't stick. Just use @@ -2394,10 +2394,9 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, * non-cacheable as well. */ shr = tmp & GITS_BASER_SHAREABILITY_MASK; - if (!shr) { + if (!shr) cache = GITS_BASER_nC; - gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order)); - } + goto retry_baser; } @@ -2609,6 +2608,11 @@ static int its_alloc_tables(struct its_node *its) /* erratum 24313: ignore memory access type */ cache = GITS_BASER_nCnB; + if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE) { + cache = GITS_BASER_nC; + shr = 0; + } + for (i = 0; i < GITS_BASER_NR_REGS; i++) { struct its_baser *baser = its->tables + i; u64 val = its_read_baser(its, baser); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 62eb27639c9b855c006ad7e13234403ba78b28a3..f03d7dba270c52f1313af3ee079cd977a207b34d 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -254,7 +254,7 @@ enum evict_result { typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context); -static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context) +static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep) { unsigned long tested = 0; struct list_head *h = lru->cursor; @@ -295,7 +295,8 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con h = h->next; - cond_resched(); + if (!no_sleep) + cond_resched(); } return NULL; @@ -382,7 +383,10 @@ struct dm_buffer { */ struct buffer_tree { - struct rw_semaphore lock; + union { + struct rw_semaphore lock; + rwlock_t spinlock; + } u; struct rb_root root; } ____cacheline_aligned_in_smp; @@ -393,9 +397,12 @@ struct dm_buffer_cache { * on the locks. */ unsigned int num_locks; + bool no_sleep; struct buffer_tree trees[]; }; +static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); + static inline unsigned int cache_index(sector_t block, unsigned int num_locks) { return dm_hash_locks_index(block, num_locks); @@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks) static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block) { - down_read(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block) { - up_read(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block) { - down_write(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block) { - up_write(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } /* @@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool static void __lh_lock(struct lock_history *lh, unsigned int index) { - if (lh->write) - down_write(&lh->cache->trees[index].lock); - else - down_read(&lh->cache->trees[index].lock); + if (lh->write) { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + write_lock_bh(&lh->cache->trees[index].u.spinlock); + else + down_write(&lh->cache->trees[index].u.lock); + } else { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + read_lock_bh(&lh->cache->trees[index].u.spinlock); + else + down_read(&lh->cache->trees[index].u.lock); + } } static void __lh_unlock(struct lock_history *lh, unsigned int index) { - if (lh->write) - up_write(&lh->cache->trees[index].lock); - else - up_read(&lh->cache->trees[index].lock); + if (lh->write) { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + write_unlock_bh(&lh->cache->trees[index].u.spinlock); + else + up_write(&lh->cache->trees[index].u.lock); + } else { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + read_unlock_bh(&lh->cache->trees[index].u.spinlock); + else + up_read(&lh->cache->trees[index].u.lock); + } } /* @@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l) return le_to_buffer(le); } -static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks) +static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep) { unsigned int i; bc->num_locks = num_locks; + bc->no_sleep = no_sleep; for (i = 0; i < bc->num_locks; i++) { - init_rwsem(&bc->trees[i].lock); + if (no_sleep) + rwlock_init(&bc->trees[i].u.spinlock); + else + init_rwsem(&bc->trees[i].u.lock); bc->trees[i].root = RB_ROOT; } @@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode struct lru_entry *le; struct dm_buffer *b; - le = lru_evict(&bc->lru[list_mode], __evict_pred, &w); + le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep); if (!le) return NULL; @@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_ struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context}; while (true) { - le = lru_evict(&bc->lru[old_mode], __evict_pred, &w); + le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep); if (!le) break; @@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc, { unsigned int i; + BUG_ON(bc->no_sleep); for (i = 0; i < bc->num_locks; i++) { - down_write(&bc->trees[i].lock); + down_write(&bc->trees[i].u.lock); __remove_range(bc, &bc->trees[i].root, begin, end, pred, release); - up_write(&bc->trees[i].lock); + up_write(&bc->trees[i].u.lock); } } @@ -979,8 +1017,6 @@ struct dm_bufio_client { struct dm_buffer_cache cache; /* must be last member */ }; -static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); - /*----------------------------------------------------------------*/ #define dm_bufio_in_request() (!!current->bio_list) @@ -1871,7 +1907,8 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, if (need_submit) submit_io(b, REQ_OP_READ, read_endio); - wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); + if (nf != NF_GET) /* we already tested this condition above */ + wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); if (b->read_error) { int error = blk_status_to_errno(b->read_error); @@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign r = -ENOMEM; goto bad_client; } - cache_init(&c->cache, num_locks); + cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0); c->bdev = bdev; c->block_size = block_size; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6de107aff331947910dc3478510a9c4fa05e341d..2ae8560b6a14ad9e6b2313fa17d1bb98a209af5a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1673,7 +1673,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size) unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; unsigned int remaining_size; - unsigned int order = MAX_ORDER - 1; + unsigned int order = MAX_ORDER; retry: if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index efd510984e25937e2ed5d25d6feb9187edfa62bd..5eabdb06c6498b103f24e7316e3de2587f6bca45 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -33,7 +33,7 @@ struct delay_c { struct work_struct flush_expired_bios; struct list_head delayed_bios; struct task_struct *worker; - atomic_t may_delay; + bool may_delay; struct delay_class read; struct delay_class write; @@ -73,39 +73,6 @@ static inline bool delay_is_fast(struct delay_c *dc) return !!dc->worker; } -static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all) -{ - struct dm_delay_info *delayed, *next; - - mutex_lock(&delayed_bios_lock); - list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { - if (flush_all || time_after_eq(jiffies, delayed->expires)) { - struct bio *bio = dm_bio_from_per_bio_data(delayed, - sizeof(struct dm_delay_info)); - list_del(&delayed->list); - dm_submit_bio_remap(bio, NULL); - delayed->class->ops--; - } - } - mutex_unlock(&delayed_bios_lock); -} - -static int flush_worker_fn(void *data) -{ - struct delay_c *dc = data; - - while (1) { - flush_delayed_bios_fast(dc, false); - if (unlikely(list_empty(&dc->delayed_bios))) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - } else - cond_resched(); - } - - return 0; -} - static void flush_bios(struct bio *bio) { struct bio *n; @@ -118,36 +85,61 @@ static void flush_bios(struct bio *bio) } } -static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all) +static void flush_delayed_bios(struct delay_c *dc, bool flush_all) { struct dm_delay_info *delayed, *next; + struct bio_list flush_bio_list; unsigned long next_expires = 0; - unsigned long start_timer = 0; - struct bio_list flush_bios = { }; + bool start_timer = false; + bio_list_init(&flush_bio_list); mutex_lock(&delayed_bios_lock); list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { + cond_resched(); if (flush_all || time_after_eq(jiffies, delayed->expires)) { struct bio *bio = dm_bio_from_per_bio_data(delayed, sizeof(struct dm_delay_info)); list_del(&delayed->list); - bio_list_add(&flush_bios, bio); + bio_list_add(&flush_bio_list, bio); delayed->class->ops--; continue; } - if (!start_timer) { - start_timer = 1; - next_expires = delayed->expires; - } else - next_expires = min(next_expires, delayed->expires); + if (!delay_is_fast(dc)) { + if (!start_timer) { + start_timer = true; + next_expires = delayed->expires; + } else { + next_expires = min(next_expires, delayed->expires); + } + } } mutex_unlock(&delayed_bios_lock); if (start_timer) queue_timeout(dc, next_expires); - return bio_list_get(&flush_bios); + flush_bios(bio_list_get(&flush_bio_list)); +} + +static int flush_worker_fn(void *data) +{ + struct delay_c *dc = data; + + while (!kthread_should_stop()) { + flush_delayed_bios(dc, false); + mutex_lock(&delayed_bios_lock); + if (unlikely(list_empty(&dc->delayed_bios))) { + set_current_state(TASK_INTERRUPTIBLE); + mutex_unlock(&delayed_bios_lock); + schedule(); + } else { + mutex_unlock(&delayed_bios_lock); + cond_resched(); + } + } + + return 0; } static void flush_expired_bios(struct work_struct *work) @@ -155,10 +147,7 @@ static void flush_expired_bios(struct work_struct *work) struct delay_c *dc; dc = container_of(work, struct delay_c, flush_expired_bios); - if (delay_is_fast(dc)) - flush_delayed_bios_fast(dc, false); - else - flush_bios(flush_delayed_bios(dc, false)); + flush_delayed_bios(dc, false); } static void delay_dtr(struct dm_target *ti) @@ -177,8 +166,7 @@ static void delay_dtr(struct dm_target *ti) if (dc->worker) kthread_stop(dc->worker); - if (!delay_is_fast(dc)) - mutex_destroy(&dc->timer_lock); + mutex_destroy(&dc->timer_lock); kfree(dc); } @@ -236,7 +224,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = dc; INIT_LIST_HEAD(&dc->delayed_bios); - atomic_set(&dc->may_delay, 1); + mutex_init(&dc->timer_lock); + dc->may_delay = true; dc->argc = argc; ret = delay_class_ctr(ti, &dc->read, argv); @@ -282,12 +271,12 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) "dm-delay-flush-worker"); if (IS_ERR(dc->worker)) { ret = PTR_ERR(dc->worker); + dc->worker = NULL; goto bad; } } else { timer_setup(&dc->delay_timer, handle_delayed_timer, 0); INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); - mutex_init(&dc->timer_lock); dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); if (!dc->kdelayd_wq) { ret = -EINVAL; @@ -312,7 +301,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) struct dm_delay_info *delayed; unsigned long expires = 0; - if (!c->delay || !atomic_read(&dc->may_delay)) + if (!c->delay) return DM_MAPIO_REMAPPED; delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); @@ -321,6 +310,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); mutex_lock(&delayed_bios_lock); + if (unlikely(!dc->may_delay)) { + mutex_unlock(&delayed_bios_lock); + return DM_MAPIO_REMAPPED; + } c->ops++; list_add_tail(&delayed->list, &dc->delayed_bios); mutex_unlock(&delayed_bios_lock); @@ -337,21 +330,20 @@ static void delay_presuspend(struct dm_target *ti) { struct delay_c *dc = ti->private; - atomic_set(&dc->may_delay, 0); + mutex_lock(&delayed_bios_lock); + dc->may_delay = false; + mutex_unlock(&delayed_bios_lock); - if (delay_is_fast(dc)) - flush_delayed_bios_fast(dc, true); - else { + if (!delay_is_fast(dc)) del_timer_sync(&dc->delay_timer); - flush_bios(flush_delayed_bios(dc, true)); - } + flush_delayed_bios(dc, true); } static void delay_resume(struct dm_target *ti) { struct delay_c *dc = ti->private; - atomic_set(&dc->may_delay, 1); + dc->may_delay = true; } static int delay_map(struct dm_target *ti, struct bio *bio) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 3ef9f018da60ce7f96dc932b028bc03bc0f5eca2..2099c755119e37a6e7c79a78f74c198559ee4c01 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -185,7 +185,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io, { if (unlikely(verity_hash(v, verity_io_hash_req(v, io), data, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io)))) + verity_io_real_digest(v, io), true))) return 0; return memcmp(verity_io_real_digest(v, io), want_digest, @@ -386,7 +386,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, /* Always re-validate the corrected block against the expected hash */ r = verity_hash(v, verity_io_hash_req(v, io), fio->output, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io)); + verity_io_real_digest(v, io), true); if (unlikely(r < 0)) return r; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 26adcfea030229b9b2b6a91667b56c0c69646d3a..e115fcfe723c989bd8fb512ca0bcacb0208fcd0a 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -135,20 +135,21 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req, * Wrapper for crypto_ahash_init, which handles verity salting. */ static int verity_hash_init(struct dm_verity *v, struct ahash_request *req, - struct crypto_wait *wait) + struct crypto_wait *wait, bool may_sleep) { int r; ahash_request_set_tfm(req, v->tfm); - ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | - CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, (void *)wait); + ahash_request_set_callback(req, + may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0, + crypto_req_done, (void *)wait); crypto_init_wait(wait); r = crypto_wait_req(crypto_ahash_init(req), wait); if (unlikely(r < 0)) { - DMERR("crypto_ahash_init failed: %d", r); + if (r != -ENOMEM) + DMERR("crypto_ahash_init failed: %d", r); return r; } @@ -179,12 +180,12 @@ static int verity_hash_final(struct dm_verity *v, struct ahash_request *req, } int verity_hash(struct dm_verity *v, struct ahash_request *req, - const u8 *data, size_t len, u8 *digest) + const u8 *data, size_t len, u8 *digest, bool may_sleep) { int r; struct crypto_wait wait; - r = verity_hash_init(v, req, &wait); + r = verity_hash_init(v, req, &wait, may_sleep); if (unlikely(r < 0)) goto out; @@ -322,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, r = verity_hash(v, verity_io_hash_req(v, io), data, 1 << v->hash_dev_block_bits, - verity_io_real_digest(v, io)); + verity_io_real_digest(v, io), !io->in_tasklet); if (unlikely(r < 0)) goto release_ret_r; @@ -556,7 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io) continue; } - r = verity_hash_init(v, req, &wait); + r = verity_hash_init(v, req, &wait, !io->in_tasklet); if (unlikely(r < 0)) return r; @@ -652,7 +653,7 @@ static void verity_tasklet(unsigned long data) io->in_tasklet = true; err = verity_verify_io(io); - if (err == -EAGAIN) { + if (err == -EAGAIN || err == -ENOMEM) { /* fallback to retrying with work-queue */ INIT_WORK(&io->work, verity_work); queue_work(io->v->verify_wq, &io->work); @@ -1033,7 +1034,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v) goto out; r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits, - v->zero_digest); + v->zero_digest, true); out: kfree(req); diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 2f555b4203679a454fad28323a22c3741108fc34..f96f4e281ee4ac5bfacb73c4b5c012dfae7d0b08 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -128,7 +128,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len)); extern int verity_hash(struct dm_verity *v, struct ahash_request *req, - const u8 *data, size_t len, u8 *digest); + const u8 *data, size_t len, u8 *digest, bool may_sleep); extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, sector_t block, u8 *digest, bool *is_zero); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 51d47eda1c873debda6da094377bcb3367a78f6e..8e6cc0e133b7f19afccd3ecf44bea5ceacb393b1 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1500,6 +1500,10 @@ static void bond_compute_features(struct bonding *bond) static void bond_setup_by_slave(struct net_device *bond_dev, struct net_device *slave_dev) { + bool was_up = !!(bond_dev->flags & IFF_UP); + + dev_close(bond_dev); + bond_dev->header_ops = slave_dev->header_ops; bond_dev->type = slave_dev->type; @@ -1514,6 +1518,8 @@ static void bond_setup_by_slave(struct net_device *bond_dev, bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST); bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP); } + if (was_up) + dev_open(bond_dev, NULL); } /* On bonding slaves other than the currently active slave, suppress diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 045fe133f6ee9957479c67b72b61c8887fbc0afe..5beadabc213618314ad42da120da259415eaa7b3 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -146,7 +146,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) } queue_work(pdsc->wq, &qcq->work); - pds_core_intr_mask(&pdsc->intr_ctrl[irq], PDS_CORE_INTR_MASK_CLEAR); + pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR); return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index f3a7deda997245bd3c80070889981f35c01dcd28..e35d3e7006bfc1891a0343643910b915f31ba56a 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -15,7 +15,7 @@ #define PDSC_DRV_DESCRIPTION "AMD/Pensando Core Driver" #define PDSC_WATCHDOG_SECS 5 -#define PDSC_QUEUE_NAME_MAX_SZ 32 +#define PDSC_QUEUE_NAME_MAX_SZ 16 #define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */ #define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */ #define PDSC_TEARDOWN_RECOVERY false diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 7c1b965d61a926df45a88e8c941f0806f12923ad..31940b857e0e501d2d4d220a0ed6a0cfd03098c7 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -261,10 +261,14 @@ static int pdsc_identify(struct pdsc *pdsc) struct pds_core_drv_identity drv = {}; size_t sz; int err; + int n; drv.drv_type = cpu_to_le32(PDS_DRIVER_LINUX); - snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str), - "%s %s", PDS_CORE_DRV_NAME, utsname()->release); + /* Catching the return quiets a Wformat-truncation complaint */ + n = snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str), + "%s %s", PDS_CORE_DRV_NAME, utsname()->release); + if (n > sizeof(drv.driver_ver_str)) + dev_dbg(pdsc->dev, "release name truncated, don't care\n"); /* Next let's get some info about the device * We use the devcmd_lock at this level in order to diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index 57f88c8b37defe17fed6e8b9d82578a4f9701dc2..e9948ea5bbcdbaae713390cca46280e55b548956 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -104,7 +104,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, struct pds_core_fw_list_info fw_list; struct pdsc *pdsc = devlink_priv(dl); union pds_core_dev_comp comp; - char buf[16]; + char buf[32]; int listlen; int err; int i; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1dee27349367e76e9460b4eaeffdd42eb1946b42..48b6191efa56c70cc82ba0a83bb5ee64e6bb3fe2 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6889,7 +6889,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) desc_idx, *post_ptr); drop_it_no_recycle: /* Other statistics kept track of by card. */ - tp->rx_dropped++; + tnapi->rx_dropped++; goto next_pkt; } @@ -7918,8 +7918,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, segs = skb_gso_segment(skb, tp->dev->features & ~(NETIF_F_TSO | NETIF_F_TSO6)); - if (IS_ERR(segs) || !segs) + if (IS_ERR(segs) || !segs) { + tnapi->tx_dropped++; goto tg3_tso_bug_end; + } skb_list_walk_safe(segs, seg, next) { skb_mark_not_on_list(seg); @@ -8190,7 +8192,7 @@ static netdev_tx_t __tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) drop: dev_kfree_skb_any(skb); drop_nofree: - tp->tx_dropped++; + tnapi->tx_dropped++; return NETDEV_TX_OK; } @@ -9405,7 +9407,7 @@ static void __tg3_set_rx_mode(struct net_device *); /* tp->lock is held. */ static int tg3_halt(struct tg3 *tp, int kind, bool silent) { - int err; + int err, i; tg3_stop_fw(tp); @@ -9426,6 +9428,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent) /* And make sure the next sample is new data */ memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats)); + + for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) { + struct tg3_napi *tnapi = &tp->napi[i]; + + tnapi->rx_dropped = 0; + tnapi->tx_dropped = 0; + } } return err; @@ -11975,6 +11984,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) { struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev; struct tg3_hw_stats *hw_stats = tp->hw_stats; + unsigned long rx_dropped; + unsigned long tx_dropped; + int i; stats->rx_packets = old_stats->rx_packets + get_stat64(&hw_stats->rx_ucast_packets) + @@ -12021,8 +12033,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) stats->rx_missed_errors = old_stats->rx_missed_errors + get_stat64(&hw_stats->rx_discards); - stats->rx_dropped = tp->rx_dropped; - stats->tx_dropped = tp->tx_dropped; + /* Aggregate per-queue counters. The per-queue counters are updated + * by a single writer, race-free. The result computed by this loop + * might not be 100% accurate (counters can be updated in the middle of + * the loop) but the next tg3_get_nstats() will recompute the current + * value so it is acceptable. + * + * Note that these counters wrap around at 4G on 32bit machines. + */ + rx_dropped = (unsigned long)(old_stats->rx_dropped); + tx_dropped = (unsigned long)(old_stats->tx_dropped); + + for (i = 0; i < tp->irq_cnt; i++) { + struct tg3_napi *tnapi = &tp->napi[i]; + + rx_dropped += tnapi->rx_dropped; + tx_dropped += tnapi->tx_dropped; + } + + stats->rx_dropped = rx_dropped; + stats->tx_dropped = tx_dropped; } static int tg3_get_regs_len(struct net_device *dev) diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index ae5c01bd111043d0ba76cfa143a2157e02966a4a..5016475e50054d627cfb541ce18d9808450a200f 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -3018,6 +3018,7 @@ struct tg3_napi { u16 *rx_rcb_prod_idx; struct tg3_rx_prodring_set prodring; struct tg3_rx_buffer_desc *rx_rcb; + unsigned long rx_dropped; u32 tx_prod ____cacheline_aligned; u32 tx_cons; @@ -3026,6 +3027,7 @@ struct tg3_napi { u32 prodmbox; struct tg3_tx_buffer_desc *tx_ring; struct tg3_tx_ring_info *tx_buffers; + unsigned long tx_dropped; dma_addr_t status_mapping; dma_addr_t rx_rcb_mapping; @@ -3220,8 +3222,6 @@ struct tg3 { /* begin "everything else" cacheline(s) section */ - unsigned long rx_dropped; - unsigned long tx_dropped; struct rtnl_link_stats64 net_stats_prev; struct tg3_ethtool_stats estats_prev; diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 5423fe26b4ef021f16714258135cd011c09026a1..78287cfcbf6388f01bfab417c264f41f3a1a16f2 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -432,8 +432,8 @@ static const struct gmac_max_framelen gmac_maxlens[] = { .val = CONFIG0_MAXLEN_1536, }, { - .max_l3_len = 1542, - .val = CONFIG0_MAXLEN_1542, + .max_l3_len = 1548, + .val = CONFIG0_MAXLEN_1548, }, { .max_l3_len = 9212, @@ -1145,6 +1145,7 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb, dma_addr_t mapping; unsigned short mtu; void *buffer; + int ret; mtu = ETH_HLEN; mtu += netdev->mtu; @@ -1159,9 +1160,30 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb, word3 |= mtu; } - if (skb->ip_summed != CHECKSUM_NONE) { + if (skb->len >= ETH_FRAME_LEN) { + /* Hardware offloaded checksumming isn't working on frames + * bigger than 1514 bytes. A hypothesis about this is that the + * checksum buffer is only 1518 bytes, so when the frames get + * bigger they get truncated, or the last few bytes get + * overwritten by the FCS. + * + * Just use software checksumming and bypass on bigger frames. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + ret = skb_checksum_help(skb); + if (ret) + return ret; + } + word1 |= TSS_BYPASS_BIT; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { int tcp = 0; + /* We do not switch off the checksumming on non TCP/UDP + * frames: as is shown from tests, the checksumming engine + * is smart enough to see that a frame is not actually TCP + * or UDP and then just pass it through without any changes + * to the frame. + */ if (skb->protocol == htons(ETH_P_IP)) { word1 |= TSS_IP_CHKSUM_BIT; tcp = ip_hdr(skb)->protocol == IPPROTO_TCP; @@ -1978,15 +2000,6 @@ static int gmac_change_mtu(struct net_device *netdev, int new_mtu) return 0; } -static netdev_features_t gmac_fix_features(struct net_device *netdev, - netdev_features_t features) -{ - if (netdev->mtu + ETH_HLEN + VLAN_HLEN > MTU_SIZE_BIT_MASK) - features &= ~GMAC_OFFLOAD_FEATURES; - - return features; -} - static int gmac_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2212,7 +2225,6 @@ static const struct net_device_ops gmac_351x_ops = { .ndo_set_mac_address = gmac_set_mac_address, .ndo_get_stats64 = gmac_get_stats64, .ndo_change_mtu = gmac_change_mtu, - .ndo_fix_features = gmac_fix_features, .ndo_set_features = gmac_set_features, }; @@ -2464,11 +2476,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) netdev->hw_features = GMAC_OFFLOAD_FEATURES; netdev->features |= GMAC_OFFLOAD_FEATURES | NETIF_F_GRO; - /* We can handle jumbo frames up to 10236 bytes so, let's accept - * payloads of 10236 bytes minus VLAN and ethernet header + /* We can receive jumbo frames up to 10236 bytes but only + * transmit 2047 bytes so, let's accept payloads of 2047 + * bytes minus VLAN and ethernet header */ netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = 10236 - VLAN_ETH_HLEN; + netdev->max_mtu = MTU_SIZE_BIT_MASK - VLAN_ETH_HLEN; port->freeq_refill = 0; netif_napi_add(netdev, &port->napi, gmac_napi_poll); diff --git a/drivers/net/ethernet/cortina/gemini.h b/drivers/net/ethernet/cortina/gemini.h index 9fdf77d5eb3740982c28f5758b595aec33dbf692..24bb989981f2339476789d8298d06aa7e0c9087a 100644 --- a/drivers/net/ethernet/cortina/gemini.h +++ b/drivers/net/ethernet/cortina/gemini.h @@ -502,7 +502,7 @@ union gmac_txdesc_3 { #define SOF_BIT 0x80000000 #define EOF_BIT 0x40000000 #define EOFIE_BIT BIT(29) -#define MTU_SIZE_BIT_MASK 0x1fff +#define MTU_SIZE_BIT_MASK 0x7ff /* Max MTU 2047 bytes */ /* GMAC Tx Descriptor */ struct gmac_txdesc { @@ -787,7 +787,7 @@ union gmac_config0 { #define CONFIG0_MAXLEN_1536 0 #define CONFIG0_MAXLEN_1518 1 #define CONFIG0_MAXLEN_1522 2 -#define CONFIG0_MAXLEN_1542 3 +#define CONFIG0_MAXLEN_1548 3 #define CONFIG0_MAXLEN_9k 4 /* 9212 */ #define CONFIG0_MAXLEN_10k 5 /* 10236 */ #define CONFIG0_MAXLEN_1518__6 6 diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 276f996f95dcc8b6ab3c5eb51958b95c19e61dd2..2d42e733837b0d5e98de7d2cdab37b23be503d1c 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -254,10 +254,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) if (block->tx) { if (block->tx->q_num < priv->tx_cfg.num_queues) reschedule |= gve_tx_poll(block, budget); - else + else if (budget) reschedule |= gve_xdp_poll(block, budget); } + if (!budget) + return 0; + if (block->rx) { work_done = gve_rx_poll(block, budget); reschedule |= work_done == budget; @@ -298,6 +301,9 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) if (block->tx) reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + if (!budget) + return 0; + if (block->rx) { work_done = gve_rx_poll_dqo(block, budget); reschedule |= work_done == budget; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index e84a066aa1a40a1f5709852c82da3d212a91f85b..73655347902d2ddedbe896841e7b42b32d0d6a89 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -1007,10 +1007,6 @@ int gve_rx_poll(struct gve_notify_block *block, int budget) feat = block->napi.dev->features; - /* If budget is 0, do all the work */ - if (budget == 0) - budget = INT_MAX; - if (budget > 0) work_done = gve_clean_rx_done(rx, budget, feat); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 6957a865cff37c86a96e4929b986915907781415..9f6ffc4a54f0bb6a46c4f6daee26f429682191e1 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -925,10 +925,6 @@ bool gve_xdp_poll(struct gve_notify_block *block, int budget) bool repoll; u32 to_do; - /* If budget is 0, do all the work */ - if (budget == 0) - budget = INT_MAX; - /* Find out how much work there is to be done */ nic_done = gve_tx_load_event_counter(priv, tx); to_do = min_t(u32, (nic_done - tx->done), budget); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 0b138635bafa9d3a0a17d532346337ffadb2aa5b..c083d1d10767bbfff0c8a58099b5b25dc8a5e7ff 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -503,11 +503,14 @@ static void hns3_get_coal_info(struct hns3_enet_tqp_vector *tqp_vector, } sprintf(result[j++], "%d", i); - sprintf(result[j++], "%s", dim_state_str[dim->state]); + sprintf(result[j++], "%s", dim->state < ARRAY_SIZE(dim_state_str) ? + dim_state_str[dim->state] : "unknown"); sprintf(result[j++], "%u", dim->profile_ix); - sprintf(result[j++], "%s", dim_cqe_mode_str[dim->mode]); + sprintf(result[j++], "%s", dim->mode < ARRAY_SIZE(dim_cqe_mode_str) ? + dim_cqe_mode_str[dim->mode] : "unknown"); sprintf(result[j++], "%s", - dim_tune_stat_str[dim->tune_state]); + dim->tune_state < ARRAY_SIZE(dim_tune_stat_str) ? + dim_tune_stat_str[dim->tune_state] : "unknown"); sprintf(result[j++], "%u", dim->steps_left); sprintf(result[j++], "%u", dim->steps_right); sprintf(result[j++], "%u", dim->tired); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 06117502001f922271f67cc9103f896e9122f2e7..b618797a7e8de2563c5821347ecb441e4f0b7ad5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -5139,7 +5139,7 @@ static int hns3_init_mac_addr(struct net_device *netdev) struct hns3_nic_priv *priv = netdev_priv(netdev); char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN]; struct hnae3_handle *h = priv->ae_handle; - u8 mac_addr_temp[ETH_ALEN]; + u8 mac_addr_temp[ETH_ALEN] = {0}; int ret = 0; if (h->ae_algo->ops->get_mac_addr) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 66e5807903a02a3838ee008e2849cb8180021f80..5ea9e59569effbb56ea5e198eb80cd782c1f7a28 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -61,6 +61,7 @@ static void hclge_sync_fd_table(struct hclge_dev *hdev); static void hclge_update_fec_stats(struct hclge_dev *hdev); static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret, int wait_cnt); +static int hclge_update_port_info(struct hclge_dev *hdev); static struct hnae3_ae_algo ae_algo; @@ -3041,6 +3042,9 @@ static void hclge_update_link_status(struct hclge_dev *hdev) if (state != hdev->hw.mac.link) { hdev->hw.mac.link = state; + if (state == HCLGE_LINK_STATUS_UP) + hclge_update_port_info(hdev); + client->ops->link_status_change(handle, state); hclge_config_mac_tnl_int(hdev, state); if (rclient && rclient->ops->link_status_change) @@ -10025,8 +10029,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, struct hclge_vport_vlan_cfg *vlan, *tmp; struct hclge_dev *hdev = vport->back; - mutex_lock(&hdev->vport_lock); - list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (vlan->vlan_id == vlan_id) { if (is_write_tbl && vlan->hd_tbl_status) @@ -10041,8 +10043,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, break; } } - - mutex_unlock(&hdev->vport_lock); } void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) @@ -10451,11 +10451,16 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, * handle mailbox. Just record the vlan id, and remove it after * reset finished. */ + mutex_lock(&hdev->vport_lock); if ((test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) && is_kill) { set_bit(vlan_id, vport->vlan_del_fail_bmap); + mutex_unlock(&hdev->vport_lock); return -EBUSY; + } else if (!is_kill && test_bit(vlan_id, vport->vlan_del_fail_bmap)) { + clear_bit(vlan_id, vport->vlan_del_fail_bmap); } + mutex_unlock(&hdev->vport_lock); /* when port base vlan enabled, we use port base vlan as the vlan * filter entry. In this case, we don't update vlan filter table @@ -10470,17 +10475,22 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, } if (!ret) { - if (!is_kill) + if (!is_kill) { hclge_add_vport_vlan_table(vport, vlan_id, writen_to_tbl); - else if (is_kill && vlan_id != 0) + } else if (is_kill && vlan_id != 0) { + mutex_lock(&hdev->vport_lock); hclge_rm_vport_vlan_table(vport, vlan_id, false); + mutex_unlock(&hdev->vport_lock); + } } else if (is_kill) { /* when remove hw vlan filter failed, record the vlan id, * and try to remove it from hw later, to be consistence * with stack */ + mutex_lock(&hdev->vport_lock); set_bit(vlan_id, vport->vlan_del_fail_bmap); + mutex_unlock(&hdev->vport_lock); } hclge_set_vport_vlan_fltr_change(vport); @@ -10520,6 +10530,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev) int i, ret, sync_cnt = 0; u16 vlan_id; + mutex_lock(&hdev->vport_lock); /* start from vport 1 for PF is always alive */ for (i = 0; i < hdev->num_alloc_vport; i++) { struct hclge_vport *vport = &hdev->vport[i]; @@ -10530,21 +10541,26 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev) ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), vport->vport_id, vlan_id, true); - if (ret && ret != -EINVAL) + if (ret && ret != -EINVAL) { + mutex_unlock(&hdev->vport_lock); return; + } clear_bit(vlan_id, vport->vlan_del_fail_bmap); hclge_rm_vport_vlan_table(vport, vlan_id, false); hclge_set_vport_vlan_fltr_change(vport); sync_cnt++; - if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) + if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) { + mutex_unlock(&hdev->vport_lock); return; + } vlan_id = find_first_bit(vport->vlan_del_fail_bmap, VLAN_N_VID); } } + mutex_unlock(&hdev->vport_lock); hclge_sync_vlan_fltr_state(hdev); } @@ -11651,6 +11667,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_msi_irq_uninit; if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) { + clear_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps); if (hnae3_dev_phy_imp_supported(hdev)) ret = hclge_update_tp_port_info(hdev); else diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index a4d68fb216fb92ae4a23b24d323dade62676abea..0aa9beefd1c7ee6c53d9f2623069bae00848e77f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1206,6 +1206,8 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle, test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) && is_kill) { set_bit(vlan_id, hdev->vlan_del_fail_bmap); return -EBUSY; + } else if (!is_kill && test_bit(vlan_id, hdev->vlan_del_fail_bmap)) { + clear_bit(vlan_id, hdev->vlan_del_fail_bmap); } hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, @@ -1233,20 +1235,25 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev) int ret, sync_cnt = 0; u16 vlan_id; + if (bitmap_empty(hdev->vlan_del_fail_bmap, VLAN_N_VID)) + return; + + rtnl_lock(); vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID); while (vlan_id != VLAN_N_VID) { ret = hclgevf_set_vlan_filter(handle, htons(ETH_P_8021Q), vlan_id, true); if (ret) - return; + break; clear_bit(vlan_id, hdev->vlan_del_fail_bmap); sync_cnt++; if (sync_cnt >= HCLGEVF_MAX_SYNC_COUNT) - return; + break; vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID); } + rtnl_unlock(); } static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) @@ -1974,8 +1981,18 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev, return HCLGEVF_VECTOR0_EVENT_OTHER; } +static void hclgevf_reset_timer(struct timer_list *t) +{ + struct hclgevf_dev *hdev = from_timer(hdev, t, reset_timer); + + hclgevf_clear_event_cause(hdev, HCLGEVF_VECTOR0_EVENT_RST); + hclgevf_reset_task_schedule(hdev); +} + static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) { +#define HCLGEVF_RESET_DELAY 5 + enum hclgevf_evt_cause event_cause; struct hclgevf_dev *hdev = data; u32 clearval; @@ -1987,7 +2004,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) switch (event_cause) { case HCLGEVF_VECTOR0_EVENT_RST: - hclgevf_reset_task_schedule(hdev); + mod_timer(&hdev->reset_timer, + jiffies + msecs_to_jiffies(HCLGEVF_RESET_DELAY)); break; case HCLGEVF_VECTOR0_EVENT_MBX: hclgevf_mbx_handler(hdev); @@ -2930,6 +2948,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) HCLGEVF_DRIVER_NAME); hclgevf_task_schedule(hdev, round_jiffies_relative(HZ)); + timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 81c16b8c8da2961c1dcc800cdf164c1dd9ae2ba1..a73f2bf3a56a6426704c64a20e74403c715ac09f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -219,6 +219,7 @@ struct hclgevf_dev { enum hnae3_reset_type reset_level; unsigned long reset_pending; enum hnae3_reset_type reset_type; + struct timer_list reset_timer; #define HCLGEVF_RESET_REQUESTED 0 #define HCLGEVF_RESET_PENDING 1 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index bbf7b14079de3cf2dc68cdd67a1f288b38903a79..85c2a634c8f96a1d4d3356b0adf9c1f87f8ed9b8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -63,6 +63,9 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1, i++; } + /* ensure additional_info will be seen after received_resp */ + smp_rmb(); + if (i >= HCLGEVF_MAX_TRY_TIMES) { dev_err(&hdev->pdev->dev, "VF could not get mbx(%u,%u) resp(=%d) from PF in %d tries\n", @@ -178,6 +181,10 @@ static void hclgevf_handle_mbx_response(struct hclgevf_dev *hdev, resp->resp_status = hclgevf_resp_to_errno(resp_status); memcpy(resp->additional_info, req->msg.resp_data, HCLGE_MBX_MAX_RESP_DATA_SIZE * sizeof(u8)); + + /* ensure additional_info will be seen before setting received_resp */ + smp_wmb(); + if (match_id) { /* If match_id is not zero, it means PF support match_id. * if the match_id is right, VF get the right response, or diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index cfb1580f5850b5496dfe4445fac14b35cacca42d..8b7504a9df316ce3be5786b61f603f0183dca5e6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -1479,14 +1479,14 @@ ice_post_dwnld_pkg_actions(struct ice_hw *hw) } /** - * ice_download_pkg + * ice_download_pkg_with_sig_seg * @hw: pointer to the hardware structure * @pkg_hdr: pointer to package header * * Handles the download of a complete package. */ static enum ice_ddp_state -ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) +ice_download_pkg_with_sig_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) { enum ice_aq_err aq_err = hw->adminq.sq_last_status; enum ice_ddp_state state = ICE_DDP_PKG_ERR; @@ -1519,6 +1519,103 @@ ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) state = ice_post_dwnld_pkg_actions(hw); ice_release_global_cfg_lock(hw); + + return state; +} + +/** + * ice_dwnld_cfg_bufs + * @hw: pointer to the hardware structure + * @bufs: pointer to an array of buffers + * @count: the number of buffers in the array + * + * Obtains global config lock and downloads the package configuration buffers + * to the firmware. + */ +static enum ice_ddp_state +ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count) +{ + enum ice_ddp_state state; + struct ice_buf_hdr *bh; + int status; + + if (!bufs || !count) + return ICE_DDP_PKG_ERR; + + /* If the first buffer's first section has its metadata bit set + * then there are no buffers to be downloaded, and the operation is + * considered a success. + */ + bh = (struct ice_buf_hdr *)bufs; + if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF) + return ICE_DDP_PKG_SUCCESS; + + status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE); + if (status) { + if (status == -EALREADY) + return ICE_DDP_PKG_ALREADY_LOADED; + return ice_map_aq_err_to_ddp_state(hw->adminq.sq_last_status); + } + + state = ice_dwnld_cfg_bufs_no_lock(hw, bufs, 0, count, true); + if (!state) + state = ice_post_dwnld_pkg_actions(hw); + + ice_release_global_cfg_lock(hw); + + return state; +} + +/** + * ice_download_pkg_without_sig_seg + * @hw: pointer to the hardware structure + * @ice_seg: pointer to the segment of the package to be downloaded + * + * Handles the download of a complete package without signature segment. + */ +static enum ice_ddp_state +ice_download_pkg_without_sig_seg(struct ice_hw *hw, struct ice_seg *ice_seg) +{ + struct ice_buf_table *ice_buf_tbl; + + ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n", + ice_seg->hdr.seg_format_ver.major, + ice_seg->hdr.seg_format_ver.minor, + ice_seg->hdr.seg_format_ver.update, + ice_seg->hdr.seg_format_ver.draft); + + ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n", + le32_to_cpu(ice_seg->hdr.seg_type), + le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_id); + + ice_buf_tbl = ice_find_buf_table(ice_seg); + + ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n", + le32_to_cpu(ice_buf_tbl->buf_count)); + + return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array, + le32_to_cpu(ice_buf_tbl->buf_count)); +} + +/** + * ice_download_pkg + * @hw: pointer to the hardware structure + * @pkg_hdr: pointer to package header + * @ice_seg: pointer to the segment of the package to be downloaded + * + * Handles the download of a complete package. + */ +static enum ice_ddp_state +ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr, + struct ice_seg *ice_seg) +{ + enum ice_ddp_state state; + + if (hw->pkg_has_signing_seg) + state = ice_download_pkg_with_sig_seg(hw, pkg_hdr); + else + state = ice_download_pkg_without_sig_seg(hw, ice_seg); + ice_post_pkg_dwnld_vlan_mode_cfg(hw); return state; @@ -2083,7 +2180,7 @@ enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len) /* initialize package hints and then download package */ ice_init_pkg_hints(hw, seg); - state = ice_download_pkg(hw, pkg); + state = ice_download_pkg(hw, pkg, seg); if (state == ICE_DDP_PKG_ALREADY_LOADED) { ice_debug(hw, ICE_DBG_INIT, "package previously loaded - no work.\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 835c419ccc7437ccc4c4ad8e4e4c4e2739a77295..86b180cb32a027d38f8422bb8da1fe3a2d1c54ac 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -815,12 +815,6 @@ ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; - if (prio > ICE_DPLL_PRIO_MAX) { - NL_SET_ERR_MSG_FMT(extack, "prio out of supported range 0-%d", - ICE_DPLL_PRIO_MAX); - return -EINVAL; - } - mutex_lock(&pf->dplls.lock); ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack); mutex_unlock(&pf->dplls.lock); @@ -1756,6 +1750,7 @@ ice_dpll_init_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu, } d->pf = pf; if (cgu) { + ice_dpll_update_state(pf, d, true); ret = dpll_device_register(d->dpll, type, &ice_dpll_ops, d); if (ret) { dpll_device_put(d->dpll); @@ -1796,8 +1791,6 @@ static int ice_dpll_init_worker(struct ice_pf *pf) struct ice_dplls *d = &pf->dplls; struct kthread_worker *kworker; - ice_dpll_update_state(pf, &d->eec, true); - ice_dpll_update_state(pf, &d->pps, true); kthread_init_delayed_work(&d->work, ice_dpll_periodic_work); kworker = kthread_create_worker(0, "ice-dplls-%s", dev_name(ice_pf_to_dev(pf))); @@ -1830,6 +1823,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, int num_pins, i, ret = -EINVAL; struct ice_hw *hw = &pf->hw; struct ice_dpll_pin *pins; + unsigned long caps; u8 freq_supp_num; bool input; @@ -1849,6 +1843,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, } for (i = 0; i < num_pins; i++) { + caps = 0; pins[i].idx = i; pins[i].prop.board_label = ice_cgu_get_pin_name(hw, i, input); pins[i].prop.type = ice_cgu_get_pin_type(hw, i, input); @@ -1861,8 +1856,8 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, &dp->input_prio[i]); if (ret) return ret; - pins[i].prop.capabilities |= - DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE; + caps |= (DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE | + DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE); pins[i].prop.phase_range.min = pf->dplls.input_phase_adj_max; pins[i].prop.phase_range.max = @@ -1872,9 +1867,11 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, pf->dplls.output_phase_adj_max; pins[i].prop.phase_range.max = -pf->dplls.output_phase_adj_max; + ret = ice_cgu_get_output_pin_state_caps(hw, i, &caps); + if (ret) + return ret; } - pins[i].prop.capabilities |= - DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; + pins[i].prop.capabilities = caps; ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL); if (ret) return ret; diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.h b/drivers/net/ethernet/intel/ice/ice_dpll.h index bb32b6d88373e21b7cf888f1527504fd3be958c4..93172e93995b949cc91a6497ee942fb1de0bcee0 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.h +++ b/drivers/net/ethernet/intel/ice/ice_dpll.h @@ -6,7 +6,6 @@ #include "ice.h" -#define ICE_DPLL_PRIO_MAX 0xF #define ICE_DPLL_RCLK_NUM_MAX 4 /** ice_dpll_pin - store info about pins diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 6d573908de7a0bf3690695e97ac191d8dec6f559..a00b55e14aac4e53806e22e9fcec377c5ffa9914 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -3961,3 +3961,57 @@ int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num) return ret; } + +/** + * ice_cgu_get_output_pin_state_caps - get output pin state capabilities + * @hw: pointer to the hw struct + * @pin_id: id of a pin + * @caps: capabilities to modify + * + * Return: + * * 0 - success, state capabilities were modified + * * negative - failure, capabilities were not modified + */ +int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id, + unsigned long *caps) +{ + bool can_change = true; + + switch (hw->device_id) { + case ICE_DEV_ID_E810C_SFP: + if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3) + can_change = false; + break; + case ICE_DEV_ID_E810C_QSFP: + if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3 || pin_id == ZL_OUT4) + can_change = false; + break; + case ICE_DEV_ID_E823L_10G_BASE_T: + case ICE_DEV_ID_E823L_1GBE: + case ICE_DEV_ID_E823L_BACKPLANE: + case ICE_DEV_ID_E823L_QSFP: + case ICE_DEV_ID_E823L_SFP: + case ICE_DEV_ID_E823C_10G_BASE_T: + case ICE_DEV_ID_E823C_BACKPLANE: + case ICE_DEV_ID_E823C_QSFP: + case ICE_DEV_ID_E823C_SFP: + case ICE_DEV_ID_E823C_SGMII: + if (hw->cgu_part_number == + ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032 && + pin_id == ZL_OUT2) + can_change = false; + else if (hw->cgu_part_number == + ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384 && + pin_id == SI_OUT1) + can_change = false; + break; + default: + return -EINVAL; + } + if (can_change) + *caps |= DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; + else + *caps &= ~DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 36aeeef99ec07e9b7bfdb63d175a1ad09188b71d..cf76701566c72479e7c8df5f2011c7ee1d146b6a 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -282,6 +282,8 @@ int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx, int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num); void ice_ptp_init_phy_model(struct ice_hw *hw); +int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id, + unsigned long *caps); #define PFTSYN_SEM_BYTES 4 diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 90817136808d46039eb55839dff812533d1ae40e..29aac327574d64c281f30ecc8aba885506d83737 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4790,14 +4790,17 @@ static void mvneta_ethtool_get_strings(struct net_device *netdev, u32 sset, u8 *data) { if (sset == ETH_SS_STATS) { + struct mvneta_port *pp = netdev_priv(netdev); int i; for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++) memcpy(data + i * ETH_GSTRING_LEN, mvneta_statistics[i].name, ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics); - page_pool_ethtool_stats_get_strings(data); + if (!pp->bm_priv) { + data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics); + page_pool_ethtool_stats_get_strings(data); + } } } @@ -4915,8 +4918,10 @@ static void mvneta_ethtool_pp_stats(struct mvneta_port *pp, u64 *data) struct page_pool_stats stats = {}; int i; - for (i = 0; i < rxq_number; i++) - page_pool_get_stats(pp->rxqs[i].page_pool, &stats); + for (i = 0; i < rxq_number; i++) { + if (pp->rxqs[i].page_pool) + page_pool_get_stats(pp->rxqs[i].page_pool, &stats); + } page_pool_ethtool_stats_get(data, &stats); } @@ -4932,14 +4937,21 @@ static void mvneta_ethtool_get_stats(struct net_device *dev, for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++) *data++ = pp->ethtool_stats[i]; - mvneta_ethtool_pp_stats(pp, data); + if (!pp->bm_priv) + mvneta_ethtool_pp_stats(pp, data); } static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset) { - if (sset == ETH_SS_STATS) - return ARRAY_SIZE(mvneta_statistics) + - page_pool_ethtool_stats_get_count(); + if (sset == ETH_SS_STATS) { + int count = ARRAY_SIZE(mvneta_statistics); + struct mvneta_port *pp = netdev_priv(dev); + + if (!pp->bm_priv) + count += page_pool_ethtool_stats_get_count(); + + return count; + } return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index bb11e644d24f7b2c7576d3402e2404315bead4af..af3928eddafd11fc65b312ab7147a3095ad9f1a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -177,6 +177,8 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, struct mlx5_cqe64 *cqe, + u8 *md_buff, + u8 *md_buff_sz, int budget) { struct mlx5e_ptp_port_ts_cqe_list *pending_cqe_list = ptpsq->ts_cqe_pending_list; @@ -211,19 +213,24 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp); out: napi_consume_skb(skb, budget); - mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, metadata_id); + md_buff[*md_buff_sz++] = metadata_id; if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) && !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work); } -static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) +static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq); - struct mlx5_cqwq *cqwq = &cq->wq; + int budget = min(napi_budget, MLX5E_TX_CQ_POLL_BUDGET); + u8 metadata_buff[MLX5E_TX_CQ_POLL_BUDGET]; + u8 metadata_buff_sz = 0; + struct mlx5_cqwq *cqwq; struct mlx5_cqe64 *cqe; int work_done = 0; + cqwq = &cq->wq; + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state))) return false; @@ -234,7 +241,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) do { mlx5_cqwq_pop(cqwq); - mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget); + mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, + metadata_buff, &metadata_buff_sz, napi_budget); } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); mlx5_cqwq_update_db_record(cqwq); @@ -242,6 +250,10 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) /* ensure cq space is freed before enabling more cqes */ wmb(); + while (metadata_buff_sz > 0) + mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, + metadata_buff[--metadata_buff_sz]); + mlx5e_txqsq_wake(&ptpsq->txqsq); return work_done == budget; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index fea8c0a5fe893b9f8abc06e22e1aa57e1df16425..4358798d6ce14a1ff41245a8b9ea043cde249fa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -492,11 +492,11 @@ static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter, void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) { - char icosq_str[MLX5E_REPORTER_PER_Q_MAX_LEN] = {}; char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; struct mlx5e_icosq *icosq = rq->icosq; struct mlx5e_priv *priv = rq->priv; struct mlx5e_err_ctx err_ctx = {}; + char icosq_str[32] = {}; err_ctx.ctx = rq; err_ctx.recover = mlx5e_rx_reporter_timeout_recover; @@ -505,7 +505,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) if (icosq) snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn); snprintf(err_str, sizeof(err_str), - "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x", + "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x", rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn); mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 00a04fdd756f570f8a9db368eb1685f9d5df9a8e..668da5c70e63de76b3dabe667fceae0502721dff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -300,9 +300,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; - e->encap_size = ipv4_encap_size; - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -322,6 +319,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, goto destroy_neigh_entry; } + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -404,16 +403,12 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, if (err) goto free_encap; - e->encap_size = ipv4_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto release_neigh; + goto free_encap; } memset(&reformat_params, 0, sizeof(reformat_params)); @@ -427,6 +422,10 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, goto free_encap; } + e->encap_size = ipv4_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -568,9 +567,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; - e->encap_size = ipv6_encap_size; - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -590,6 +586,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, goto destroy_neigh_entry; } + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); @@ -671,16 +669,12 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, if (err) goto free_encap; - e->encap_size = ipv6_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto release_neigh; + goto free_encap; } memset(&reformat_params, 0, sizeof(reformat_params)); @@ -694,6 +688,10 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, goto free_encap; } + e->encap_size = ipv6_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 215261a6925507ec84babc66465fddc67f3e97c6..792a0ea544cd39997378ff43cf61776cb0c69bdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -43,12 +43,17 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) { struct mlx5_core_dev *mdev = priv->mdev; + int count; strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%04d (%.16s)", - fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), - mdev->board_id); + count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); + if (count == sizeof(drvinfo->fw_version)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev)); + strscpy(drvinfo->bus_info, dev_name(mdev->device), sizeof(drvinfo->bus_info)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 693e55b010d9e85f55c24b38282fd63bbf1a8bd2..3ab682bbcf86780fa16daa27bc99baf9219bd88e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -71,13 +71,17 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + int count; strscpy(drvinfo->driver, mlx5e_rep_driver_name, sizeof(drvinfo->driver)); - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%04d (%.16s)", - fw_rev_maj(mdev), fw_rev_min(mdev), - fw_rev_sub(mdev), mdev->board_id); + count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); + if (count == sizeof(drvinfo->fw_version)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev)); } static const struct counter_desc sw_rep_stats_desc[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9a5a5c2c7da9e10fe123b7cc2911f6b775dae50a..7ca9e5b86778e3b0353530c4ce021a1fa8e5fd21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3147,7 +3147,7 @@ static struct mlx5_fields fields[] = { OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), - OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp), + OFFLOAD(IP_DSCP, 16, 0x0fc0, ip6, 0, ip_dscp), OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), @@ -3158,21 +3158,31 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; -static unsigned long mask_to_le(unsigned long mask, int size) +static u32 mask_field_get(void *mask, struct mlx5_fields *f) { - __be32 mask_be32; - __be16 mask_be16; - - if (size == 32) { - mask_be32 = (__force __be32)(mask); - mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); - } else if (size == 16) { - mask_be32 = (__force __be32)(mask); - mask_be16 = *(__be16 *)&mask_be32; - mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); + switch (f->field_bsize) { + case 32: + return be32_to_cpu(*(__be32 *)mask) & f->field_mask; + case 16: + return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask; + default: + return *(u8 *)mask & (u8)f->field_mask; } +} - return mask; +static void mask_field_clear(void *mask, struct mlx5_fields *f) +{ + switch (f->field_bsize) { + case 32: + *(__be32 *)mask &= ~cpu_to_be32(f->field_mask); + break; + case 16: + *(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask); + break; + default: + *(u8 *)mask &= ~(u8)f->field_mask; + break; + } } static int offload_pedit_fields(struct mlx5e_priv *priv, @@ -3184,11 +3194,12 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; struct pedit_headers_action *hdrs = parse_attr->hdrs; void *headers_c, *headers_v, *action, *vals_p; - u32 *s_masks_p, *a_masks_p, s_mask, a_mask; struct mlx5e_tc_mod_hdr_acts *mod_acts; - unsigned long mask, field_mask; + void *s_masks_p, *a_masks_p; int i, first, last, next_z; struct mlx5_fields *f; + unsigned long mask; + u32 s_mask, a_mask; u8 cmd; mod_acts = &parse_attr->mod_hdr_acts; @@ -3204,15 +3215,11 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, bool skip; f = &fields[i]; - /* avoid seeing bits set from previous iterations */ - s_mask = 0; - a_mask = 0; - s_masks_p = (void *)set_masks + f->offset; a_masks_p = (void *)add_masks + f->offset; - s_mask = *s_masks_p & f->field_mask; - a_mask = *a_masks_p & f->field_mask; + s_mask = mask_field_get(s_masks_p, f); + a_mask = mask_field_get(a_masks_p, f); if (!s_mask && !a_mask) /* nothing to offload here */ continue; @@ -3239,22 +3246,20 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, match_mask, f->field_bsize)) skip = true; /* clear to denote we consumed this field */ - *s_masks_p &= ~f->field_mask; + mask_field_clear(s_masks_p, f); } else { cmd = MLX5_ACTION_TYPE_ADD; mask = a_mask; vals_p = (void *)add_vals + f->offset; /* add 0 is no change */ - if ((*(u32 *)vals_p & f->field_mask) == 0) + if (!mask_field_get(vals_p, f)) skip = true; /* clear to denote we consumed this field */ - *a_masks_p &= ~f->field_mask; + mask_field_clear(a_masks_p, f); } if (skip) continue; - mask = mask_to_le(mask, f->field_bsize); - first = find_first_bit(&mask, f->field_bsize); next_z = find_next_zero_bit(&mask, f->field_bsize, first); last = find_last_bit(&mask, f->field_bsize); @@ -3281,10 +3286,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, MLX5_SET(set_action_in, action, field, f->field); if (cmd == MLX5_ACTION_TYPE_SET) { + unsigned long field_mask = f->field_mask; int start; - field_mask = mask_to_le(f->field_mask, f->field_bsize); - /* if field is bit sized it can start not from first bit */ start = find_first_bit(&field_mask, f->field_bsize); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index d41435c22ce56f6c4fc2f9bed393b9785ff5b45b..f0b506e562df31d194490dd482ae6bf71b658706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -399,9 +399,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata); mlx5e_skb_cb_hwtstamp_init(skb); - mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, metadata_index); + mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); if (!netif_tx_queue_stopped(sq->txq) && mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) { netif_tx_stop_queue(sq->txq); @@ -494,10 +494,10 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; - dev_kfree_skb_any(skb); if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist, be32_to_cpu(eseg->flow_table_metadata)); + dev_kfree_skb_any(skb); mlx5e_tx_flush(sq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ea0405e0a43facbae35f5d0ebe4bb01f144b2a56..40a6cb052a2da3f4c0e272202b3bd1466053a5ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -885,11 +885,14 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_irq *irq; + int cpu; irq = xa_load(&table->comp_irqs, vecidx); if (!irq) return; + cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); + cpumask_clear_cpu(cpu, &table->used_cpus); xa_erase(&table->comp_irqs, vecidx); mlx5_irq_affinity_irq_release(dev, irq); } @@ -897,16 +900,26 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; - irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx); - if (IS_ERR(irq)) { - /* In case SF irq pool does not exist, fallback to the PF irqs*/ - if (PTR_ERR(irq) == -ENOENT) - return comp_irq_request_pci(dev, vecidx); + /* In case SF irq pool does not exist, fallback to the PF irqs*/ + if (!mlx5_irq_pool_is_sf_pool(pool)) + return comp_irq_request_pci(dev, vecidx); + af_desc.is_managed = 1; + cpumask_copy(&af_desc.mask, cpu_online_mask); + cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus); + irq = mlx5_irq_affinity_request(pool, &af_desc); + if (IS_ERR(irq)) return PTR_ERR(irq); - } + + cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq)); + mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", + pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), + cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), + mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b296ac52a43974fc17e372a403a981a9220ab273..88236e75fd9013058dd855a77073ed21cb1e8afe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -984,7 +984,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - if (rep->vport == MLX5_VPORT_UPLINK && on_esw->offloads.ft_ipsec_tx_pol) { + if (rep->vport == MLX5_VPORT_UPLINK && + on_esw == from_esw && on_esw->offloads.ft_ipsec_tx_pol) { dest.ft = on_esw->offloads.ft_ipsec_tx_pol; flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 047d5fed5f89e62cb58c4e9f8d2e094247430a16..612e666ec2635614f2e24443f319efd822b369b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -168,45 +168,3 @@ void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *i if (pool->irqs_per_cpu) cpu_put(pool, cpu); } - -/** - * mlx5_irq_affinity_irq_request_auto - request one IRQ for mlx5 device. - * @dev: mlx5 device that is requesting the IRQ. - * @used_cpus: cpumask of bounded cpus by the device - * @vecidx: vector index to request an IRQ for. - * - * Each IRQ is bounded to at most 1 CPU. - * This function is requesting an IRQ according to the default assignment. - * The default assignment policy is: - * - request the least loaded IRQ which is not bound to any - * CPU of the previous IRQs requested. - * - * On success, this function updates used_cpus mask and returns an irq pointer. - * In case of an error, an appropriate error pointer is returned. - */ -struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, - struct cpumask *used_cpus, u16 vecidx) -{ - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); - struct irq_affinity_desc af_desc = {}; - struct mlx5_irq *irq; - - if (!mlx5_irq_pool_is_sf_pool(pool)) - return ERR_PTR(-ENOENT); - - af_desc.is_managed = 1; - cpumask_copy(&af_desc.mask, cpu_online_mask); - cpumask_andnot(&af_desc.mask, &af_desc.mask, used_cpus); - irq = mlx5_irq_affinity_request(pool, &af_desc); - - if (IS_ERR(irq)) - return irq; - - cpumask_or(used_cpus, used_cpus, mlx5_irq_get_affinity_mask(irq)); - mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", - pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), - cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), - mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); - - return irq; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index aa29f09e83564270b62c8abfe4e4f81e5e672408..0c83ef174275a7948eb42b2c4412c77178d8792c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -384,7 +384,12 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) { - return mlx5_ptp_adjtime(ptp, delta); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + return mlx5_ptp_adjtime_real_time(mdev, delta); } static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 653648216730acb975a9a69f1dd54a001bd21383..4dcf995cb1a2042c39938ee2f166a6c3d3e6ef24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -28,7 +28,7 @@ struct mlx5_irq { struct atomic_notifier_head nh; cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; + char name[MLX5_MAX_IRQ_FORMATTED_NAME]; struct mlx5_irq_pool *pool; int refcount; struct msi_map map; @@ -292,8 +292,8 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, else irq_sf_set_name(pool, name, i); ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); - snprintf(irq->name, MLX5_MAX_IRQ_NAME, - "%s@pci:%s", name, pci_name(dev->pdev)); + snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME, + MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev)); err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name, &irq->nh); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h index d3a77a0ab8488b7ddddf9ebbaac5c6ee46c638e5..c4d377f8df308917c89e0c8306cbd6c9ef3d8f83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -7,6 +7,9 @@ #include <linux/mlx5/driver.h> #define MLX5_MAX_IRQ_NAME (32) +#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s") +#define MLX5_MAX_IRQ_FORMATTED_NAME \ + (MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR)) /* max irq_index is 2047, so four chars */ #define MLX5_MAX_IRQ_IDX_CHARS (4) #define MLX5_EQ_REFS_PER_IRQ (2) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 6ea88a5818047224973f7eb76e6beeebaf55b7f9..e3ec559369fa07ab7768454f7d31d0413678561b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -57,7 +57,8 @@ static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id) static bool mlx5dr_action_supp_fwd_fdb_multi_ft(struct mlx5_core_dev *dev) { - return (MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) || + return (MLX5_CAP_GEN(dev, steering_format_version) < MLX5_STEERING_FORMAT_CONNECTX_6DX || + MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) || MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 4e8527a724f504e177058cb404bea04e04322819..6fa06ba2d346532a6e0c7ffd24cd8fb127b9d219 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -52,7 +52,6 @@ struct dr_qp_init_attr { u32 cqn; u32 pdn; u32 max_send_wr; - u32 max_send_sge; struct mlx5_uars_page *uar; u8 isolate_vl_tc:1; }; @@ -247,37 +246,6 @@ static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) return err == CQ_POLL_ERR ? err : npolled; } -static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr) -{ - return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_flow_update_ctrl_seg) + - sizeof(struct mlx5_wqe_header_modify_argument_update_seg)); -} - -/* We calculate for specific RC QP with the required functionality */ -static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr) -{ - int update_arg_size; - int inl_size = 0; - int tot_size; - int size; - - update_arg_size = dr_qp_get_args_update_send_wqe_size(attr); - - size = sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg); - inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) + - DR_STE_SIZE, 16); - - size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg); - - size = max(size, update_arg_size); - - tot_size = max(size, inl_size); - - return ALIGN(tot_size, MLX5_SEND_WQE_BB); -} - static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, struct dr_qp_init_attr *attr) { @@ -285,7 +253,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; struct mlx5_wq_param wqp; struct mlx5dr_qp *dr_qp; - int wqe_size; int inlen; void *qpc; void *in; @@ -365,15 +332,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, if (err) goto err_in; dr_qp->uar = attr->uar; - wqe_size = dr_qp_calc_rc_send_wqe(attr); - dr_qp->max_inline_data = min(wqe_size - - (sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_inline_seg)), - (2 * MLX5_SEND_WQE_BB - - (sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_inline_seg)))); return dr_qp; @@ -437,48 +395,8 @@ dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, MLX5_SEND_WQE_DS; } -static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp, - struct dr_data_seg *data_seg, void *wqe) -{ - int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_inline_seg); - struct mlx5_wqe_inline_seg *seg; - int left_space; - int inl = 0; - void *addr; - int len; - int idx; - - seg = wqe; - wqe += sizeof(*seg); - addr = (void *)(unsigned long)(data_seg->addr); - len = data_seg->length; - inl += len; - left_space = MLX5_SEND_WQE_BB - inline_header_size; - - if (likely(len > left_space)) { - memcpy(wqe, addr, left_space); - len -= left_space; - addr += left_space; - idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1); - wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); - } - - memcpy(wqe, addr, len); - - if (likely(inl)) { - seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); - return DIV_ROUND_UP(inl + sizeof(seg->byte_count), - MLX5_SEND_WQE_DS); - } else { - return 0; - } -} - static void -dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp, - struct mlx5_wqe_ctrl_seg *wq_ctrl, +dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, u64 remote_addr, u32 rkey, struct dr_data_seg *data_seg, @@ -494,17 +412,15 @@ dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp, wq_raddr->reserved = 0; wq_dseg = (void *)(wq_raddr + 1); - /* WQE ctrl segment + WQE remote addr segment */ - *size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS; - if (data_seg->send_flags & IB_SEND_INLINE) { - *size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg); - } else { - wq_dseg->byte_count = cpu_to_be32(data_seg->length); - wq_dseg->lkey = cpu_to_be32(data_seg->lkey); - wq_dseg->addr = cpu_to_be64(data_seg->addr); - *size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS; /* WQE data segment */ - } + wq_dseg->byte_count = cpu_to_be32(data_seg->length); + wq_dseg->lkey = cpu_to_be32(data_seg->lkey); + wq_dseg->addr = cpu_to_be64(data_seg->addr); + + *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ + sizeof(*wq_dseg) + /* WQE data segment */ + sizeof(*wq_raddr)) / /* WQE remote addr segment */ + MLX5_SEND_WQE_DS; } static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl, @@ -535,7 +451,7 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, switch (opcode) { case MLX5_OPCODE_RDMA_READ: case MLX5_OPCODE_RDMA_WRITE: - dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr, + dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr, rkey, data_seg, &size); break; case MLX5_OPCODE_FLOW_TBL_ACCESS: @@ -656,7 +572,7 @@ static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring, if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; else - send_info->write.send_flags &= ~IB_SEND_SIGNALED; + send_info->write.send_flags = 0; } static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, @@ -680,13 +596,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, } send_ring->pending_wqe++; - if (!send_info->write.lkey) - send_info->write.send_flags |= IB_SEND_INLINE; if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; - else - send_info->write.send_flags &= ~IB_SEND_SIGNALED; send_ring->pending_wqe++; send_info->read.length = send_info->write.length; @@ -696,9 +608,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, send_info->read.lkey = send_ring->sync_mr->mkey; if (send_ring->pending_wqe % send_ring->signal_th == 0) - send_info->read.send_flags |= IB_SEND_SIGNALED; + send_info->read.send_flags = IB_SEND_SIGNALED; else - send_info->read.send_flags &= ~IB_SEND_SIGNALED; + send_info->read.send_flags = 0; } static void dr_fill_data_segs(struct mlx5dr_domain *dmn, @@ -1345,7 +1257,6 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) dmn->send_ring->cq->qp = dmn->send_ring->qp; dmn->info.max_send_wr = QUEUE_SIZE; - init_attr.max_send_sge = 1; dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, DR_STE_SIZE); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 0c76c162b8a9f5eb720ea3ed00a5beaf4d930ad0..b9bb1d2f0237e986ea258ef2f441564969dd6c0e 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -624,6 +624,7 @@ struct rtl8169_private { unsigned supports_gmii:1; unsigned aspm_manageable:1; + unsigned dash_enabled:1; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; struct rtl8169_tc_offsets tc_offset; @@ -1253,14 +1254,26 @@ static bool r8168ep_check_dash(struct rtl8169_private *tp) return r8168ep_ocp_read(tp, 0x128) & BIT(0); } -static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp) +static bool rtl_dash_is_enabled(struct rtl8169_private *tp) +{ + switch (tp->dash_type) { + case RTL_DASH_DP: + return r8168dp_check_dash(tp); + case RTL_DASH_EP: + return r8168ep_check_dash(tp); + default: + return false; + } +} + +static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp) { switch (tp->mac_version) { case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: - return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE; + return RTL_DASH_DP; case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53: - return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE; + return RTL_DASH_EP; default: return RTL_DASH_NONE; } @@ -1453,7 +1466,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) device_set_wakeup_enable(tp_to_dev(tp), wolopts); - if (tp->dash_type == RTL_DASH_NONE) { + if (!tp->dash_enabled) { rtl_set_d3_pll_down(tp, !wolopts); tp->dev->wol_enabled = wolopts ? 1 : 0; } @@ -2512,7 +2525,7 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp) static void rtl_prepare_power_down(struct rtl8169_private *tp) { - if (tp->dash_type != RTL_DASH_NONE) + if (tp->dash_enabled) return; if (tp->mac_version == RTL_GIGA_MAC_VER_32 || @@ -4648,10 +4661,16 @@ static void rtl8169_down(struct rtl8169_private *tp) rtl8169_cleanup(tp); rtl_disable_exit_l1(tp); rtl_prepare_power_down(tp); + + if (tp->dash_type != RTL_DASH_NONE) + rtl8168_driver_stop(tp); } static void rtl8169_up(struct rtl8169_private *tp) { + if (tp->dash_type != RTL_DASH_NONE) + rtl8168_driver_start(tp); + pci_set_master(tp->pci_dev); phy_init_hw(tp->phydev); phy_resume(tp->phydev); @@ -4869,7 +4888,7 @@ static int rtl8169_runtime_idle(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); - if (tp->dash_type != RTL_DASH_NONE) + if (tp->dash_enabled) return -EBUSY; if (!netif_running(tp->dev) || !netif_carrier_ok(tp->dev)) @@ -4895,8 +4914,7 @@ static void rtl_shutdown(struct pci_dev *pdev) /* Restore original MAC address */ rtl_rar_set(tp, tp->dev->perm_addr); - if (system_state == SYSTEM_POWER_OFF && - tp->dash_type == RTL_DASH_NONE) { + if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) { pci_wake_from_d3(pdev, tp->saved_wolopts); pci_set_power_state(pdev, PCI_D3hot); } @@ -5254,7 +5272,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1); tp->aspm_manageable = !rc; - tp->dash_type = rtl_check_dash(tp); + tp->dash_type = rtl_get_dash_type(tp); + tp->dash_enabled = rtl_dash_is_enabled(tp); tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK; @@ -5325,7 +5344,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* configure chip for default features */ rtl8169_set_features(dev, dev->features); - if (tp->dash_type == RTL_DASH_NONE) { + if (!tp->dash_enabled) { rtl_set_d3_pll_down(tp, true); } else { rtl_set_d3_pll_down(tp, false); @@ -5365,7 +5384,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) "ok" : "ko"); if (tp->dash_type != RTL_DASH_NONE) { - netdev_info(dev, "DASH enabled\n"); + netdev_info(dev, "DASH %s\n", + tp->dash_enabled ? "enabled" : "disabled"); rtl8168_driver_start(tp); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3e50fd53a617440c3f417fd0ceea7209bca9dc1b..2afb2bd25977a2265d998fb2203bbe3a70a9d3f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5293,6 +5293,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dma_dir = page_pool_get_dma_dir(rx_q->page_pool); buf_sz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE; + limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit); if (netif_msg_rx_status(priv)) { void *rx_head; @@ -5328,10 +5329,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) len = 0; } +read_again: if (count >= limit) break; -read_again: buf1_len = 0; buf2_len = 0; entry = next_entry; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 6c4b64227ac886dde27b4b0e5343bd730099d00a..411898a4f38caa73b5e88e31b554601ee079ae6d 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -2063,7 +2063,7 @@ static int prueth_probe(struct platform_device *pdev) &prueth->shram); if (ret) { dev_err(dev, "unable to get PRUSS SHRD RAM2: %d\n", ret); - pruss_put(prueth->pruss); + goto put_pruss; } prueth->sram_pool = of_gen_pool_get(np, "sram", 0); @@ -2105,10 +2105,7 @@ static int prueth_probe(struct platform_device *pdev) prueth->iep1 = icss_iep_get_idx(np, 1); if (IS_ERR(prueth->iep1)) { ret = dev_err_probe(dev, PTR_ERR(prueth->iep1), "iep1 get failed\n"); - icss_iep_put(prueth->iep0); - prueth->iep0 = NULL; - prueth->iep1 = NULL; - goto free_pool; + goto put_iep0; } if (prueth->pdata.quirk_10m_link_issue) { @@ -2205,6 +2202,12 @@ static int prueth_probe(struct platform_device *pdev) exit_iep: if (prueth->pdata.quirk_10m_link_issue) icss_iep_exit_fw(prueth->iep1); + icss_iep_put(prueth->iep1); + +put_iep0: + icss_iep_put(prueth->iep0); + prueth->iep0 = NULL; + prueth->iep1 = NULL; free_pool: gen_pool_free(prueth->sram_pool, @@ -2212,6 +2215,8 @@ static int prueth_probe(struct platform_device *pdev) put_mem: pruss_release_mem_region(prueth->pruss, &prueth->shram); + +put_pruss: pruss_put(prueth->pruss); put_cores: diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 21e9cac7312186380fa60de11f0a9178080b74b0..2d5b021b4ea6053eeb055a76fa4c7d9380cd2a53 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -411,7 +411,7 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, return addr; } -static int ipvlan_process_v4_outbound(struct sk_buff *skb) +static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) { const struct iphdr *ip4h = ip_hdr(skb); struct net_device *dev = skb->dev; @@ -453,13 +453,11 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) } #if IS_ENABLED(CONFIG_IPV6) -static int ipvlan_process_v6_outbound(struct sk_buff *skb) + +static noinline_for_stack int +ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); - struct net_device *dev = skb->dev; - struct net *net = dev_net(dev); - struct dst_entry *dst; - int err, ret = NET_XMIT_DROP; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, .daddr = ip6h->daddr, @@ -469,27 +467,38 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) .flowi6_mark = skb->mark, .flowi6_proto = ip6h->nexthdr, }; + struct dst_entry *dst; + int err; - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - ret = dst->error; + dst = ip6_route_output(dev_net(dev), NULL, &fl6); + err = dst->error; + if (err) { dst_release(dst); - goto err; + return err; } skb_dst_set(skb, dst); + return 0; +} + +static int ipvlan_process_v6_outbound(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + int err, ret = NET_XMIT_DROP; + + err = ipvlan_route_v6_outbound(dev, skb); + if (unlikely(err)) { + DEV_STATS_INC(dev, tx_errors); + kfree_skb(skb); + return err; + } memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - err = ip6_local_out(net, skb->sk, skb); + err = ip6_local_out(dev_net(dev), skb->sk, skb); if (unlikely(net_xmit_eval(err))) DEV_STATS_INC(dev, tx_errors); else ret = NET_XMIT_SUCCESS; - goto out; -err: - DEV_STATS_INC(dev, tx_errors); - kfree_skb(skb); -out: return ret; } #else diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 02bd201bc7e58e566af962986e2c3bf00deba50e..c8da94af4161a5fc7e61aae5e11646bc0a5013fd 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -780,7 +780,7 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change) if (dev->flags & IFF_UP) { if (change & IFF_ALLMULTI) dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) + if (!macvlan_passthru(vlan->port) && change & IFF_PROMISC) dev_set_promiscuity(lowerdev, dev->flags & IFF_PROMISC ? 1 : -1); diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index ebcdffdf4f0e0193635d2b479e8a9f7a32703509..52d05ce4a2819815963eebf4df399058835ff350 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -453,6 +453,10 @@ ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) case PPPIOCSMRU: if (get_user(val, (int __user *) argp)) break; + if (val > U16_MAX) { + err = -EINVAL; + break; + } if (val < PPP_MRU) val = PPP_MRU; ap->mru = val; @@ -687,7 +691,7 @@ ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count) /* strip address/control field if present */ p = skb->data; - if (p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) { + if (skb->len >= 2 && p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) { /* chop off address/control */ if (skb->len < 3) goto err; diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c index 539d8920c2029b2146308b45cf7650ad992f72a9..bb0d92461b08b3796df3f2b4078987291bb2f3cf 100644 --- a/drivers/parisc/power.c +++ b/drivers/parisc/power.c @@ -176,7 +176,7 @@ static struct notifier_block parisc_panic_block = { static int qemu_power_off(struct sys_off_data *data) { /* this turns the system off via SeaBIOS */ - *(int *)data->cb_data = 0; + gsc_writel(0, (unsigned long) data->cb_data); pdc_soft_power_button(1); return NOTIFY_DONE; } diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 3f7a7478880240a2d256caf624b61dcc8e7054af..7513018c9f9ac72d5c1b0055b55ae9ff36e710b0 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -572,7 +572,8 @@ ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags, for (i = 0; i < cnt; i++) { event[i] = queue->buf[queue->head]; - queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + /* Paired with READ_ONCE() in queue_cnt() */ + WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS); } spin_unlock_irqrestore(&queue->lock, flags); diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 3134568af622d396f6ab15049cd1a3ace3243269..15b804ba48685ee11a34b88df1ae738a136d17a1 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -57,10 +57,11 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue, dst->t.sec = seconds; dst->t.nsec = remainder; + /* Both WRITE_ONCE() are paired with READ_ONCE() in queue_cnt() */ if (!queue_free(queue)) - queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS); - queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS; + WRITE_ONCE(queue->tail, (queue->tail + 1) % PTP_MAX_TIMESTAMPS); spin_unlock_irqrestore(&queue->lock, flags); } diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 35fde0a0574606a04d6bdf0ab42a204da5fa6532..45f9002a5dcaea2c588c001fa83317fc318500ee 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -85,9 +85,13 @@ struct ptp_vclock { * that a writer might concurrently increment the tail does not * matter, since the queue remains nonempty nonetheless. */ -static inline int queue_cnt(struct timestamp_event_queue *q) +static inline int queue_cnt(const struct timestamp_event_queue *q) { - int cnt = q->tail - q->head; + /* + * Paired with WRITE_ONCE() in enqueue_external_timestamp(), + * ptp_read(), extts_fifo_show(). + */ + int cnt = READ_ONCE(q->tail) - READ_ONCE(q->head); return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt; } diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 7d023d9d0acbfb3d128be09578753588fa59e84d..f7a499a1bd39ec22edf6c77407a48736e137f277 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -94,7 +94,8 @@ static ssize_t extts_fifo_show(struct device *dev, qcnt = queue_cnt(queue); if (qcnt) { event = queue->buf[queue->head]; - queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + /* Paired with READ_ONCE() in queue_cnt() */ + WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS); } spin_unlock_irqrestore(&queue->lock, flags); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 32d1e73e46eecdcfdb4b6d30eef826202e5a0c3a..03348f605c2e9a5289082fbd2c1694c90229cf16 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1837,8 +1837,16 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res, } spin_lock_irqsave(qp->qp_lock_ptr, *flags); - if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd))) - sp->done(sp, res); + switch (sp->type) { + case SRB_SCSI_CMD: + if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd))) + sp->done(sp, res); + break; + default: + if (ret_cmd) + sp->done(sp, res); + break; + } } else { sp->done(sp, res); } diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 67922e2c4c1915cb74dd62b7a7704d371efdb1d9..6d8218a4412264952226a3a2a628e5a6e7d8d2bc 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1019,7 +1019,7 @@ static ssize_t sdebug_error_write(struct file *file, const char __user *ubuf, struct sdebug_err_inject *inject; struct scsi_device *sdev = (struct scsi_device *)file->f_inode->i_private; - buf = kmalloc(count, GFP_KERNEL); + buf = kzalloc(count + 1, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1132,7 +1132,6 @@ static const struct file_operations sdebug_target_reset_fail_fops = { static int sdebug_target_alloc(struct scsi_target *starget) { struct sdebug_target_info *targetip; - struct dentry *dentry; targetip = kzalloc(sizeof(struct sdebug_target_info), GFP_KERNEL); if (!targetip) @@ -1140,15 +1139,9 @@ static int sdebug_target_alloc(struct scsi_target *starget) targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev), sdebug_debugfs_root); - if (IS_ERR_OR_NULL(targetip->debugfs_entry)) - pr_info("%s: failed to create debugfs directory for target %s\n", - __func__, dev_name(&starget->dev)); debugfs_create_file("fail_reset", 0600, targetip->debugfs_entry, starget, &sdebug_target_reset_fail_fops); - if (IS_ERR_OR_NULL(dentry)) - pr_info("%s: failed to create fail_reset file for target %s\n", - __func__, dev_name(&starget->dev)); starget->hostdata = targetip; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 530918cbfce2d1840feec8dcca2dcc7479257452..fa00dd503cbf618b066eac2001d60a3dc59fd1ed 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1643,24 +1643,21 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing) return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0; } -static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) +static int sd_sync_cache(struct scsi_disk *sdkp) { int retries, res; struct scsi_device *sdp = sdkp->device; const int timeout = sdp->request_queue->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; - struct scsi_sense_hdr my_sshdr; + struct scsi_sense_hdr sshdr; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, - /* caller might not be interested in sense, but we need it */ - .sshdr = sshdr ? : &my_sshdr, + .sshdr = &sshdr, }; if (!scsi_device_online(sdp)) return -ENODEV; - sshdr = exec_args.sshdr; - for (retries = 3; retries > 0; --retries) { unsigned char cmd[16] = { 0 }; @@ -1685,15 +1682,23 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) return res; if (scsi_status_is_check_condition(res) && - scsi_sense_valid(sshdr)) { - sd_print_sense_hdr(sdkp, sshdr); + scsi_sense_valid(&sshdr)) { + sd_print_sense_hdr(sdkp, &sshdr); /* we need to evaluate the error return */ - if (sshdr->asc == 0x3a || /* medium not present */ - sshdr->asc == 0x20 || /* invalid command */ - (sshdr->asc == 0x74 && sshdr->ascq == 0x71)) /* drive is password locked */ + if (sshdr.asc == 0x3a || /* medium not present */ + sshdr.asc == 0x20 || /* invalid command */ + (sshdr.asc == 0x74 && sshdr.ascq == 0x71)) /* drive is password locked */ /* this is no error here */ return 0; + /* + * This drive doesn't support sync and there's not much + * we can do because this is called during shutdown + * or suspend so just return success so those operations + * can proceed. + */ + if (sshdr.sense_key == ILLEGAL_REQUEST) + return 0; } switch (host_byte(res)) { @@ -3853,7 +3858,7 @@ static void sd_shutdown(struct device *dev) if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); - sd_sync_cache(sdkp, NULL); + sd_sync_cache(sdkp); } if ((system_state != SYSTEM_RESTART && @@ -3874,7 +3879,6 @@ static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime) static int sd_suspend_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - struct scsi_sense_hdr sshdr; int ret = 0; if (!sdkp) /* E.g.: runtime suspend following sd_remove() */ @@ -3883,24 +3887,13 @@ static int sd_suspend_common(struct device *dev, bool runtime) if (sdkp->WCE && sdkp->media_present) { if (!sdkp->device->silence_suspend) sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); - ret = sd_sync_cache(sdkp, &sshdr); - - if (ret) { - /* ignore OFFLINE device */ - if (ret == -ENODEV) - return 0; - - if (!scsi_sense_valid(&sshdr) || - sshdr.sense_key != ILLEGAL_REQUEST) - return ret; + ret = sd_sync_cache(sdkp); + /* ignore OFFLINE device */ + if (ret == -ENODEV) + return 0; - /* - * sshdr.sense_key == ILLEGAL_REQUEST means this drive - * doesn't support sync. There's not much to do and - * suspend shouldn't fail. - */ - ret = 0; - } + if (ret) + return ret; } if (sd_do_start_stop(sdkp->device, runtime)) { diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 2ba8ec254dceeec5bacf2b027bb1b06ed9f54c56..0787456c2b892f773bba5cf66c09ac7918787852 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -436,7 +436,7 @@ int ufshcd_mcq_init(struct ufs_hba *hba) for (i = 0; i < hba->nr_hw_queues; i++) { hwq = &hba->uhq[i]; - hwq->max_entries = hba->nutrs; + hwq->max_entries = hba->nutrs + 1; spin_lock_init(&hwq->sq_lock); spin_lock_init(&hwq->cq_lock); mutex_init(&hwq->sq_mutex); @@ -630,6 +630,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) int tag = scsi_cmd_to_rq(cmd)->tag; struct ufshcd_lrb *lrbp = &hba->lrb[tag]; struct ufs_hw_queue *hwq; + unsigned long flags; int err = FAILED; if (!ufshcd_cmd_inflight(lrbp->cmd)) { @@ -670,8 +671,10 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) } err = SUCCESS; + spin_lock_irqsave(&hwq->cq_lock, flags); if (ufshcd_cmd_inflight(lrbp->cmd)) ufshcd_release_scsi_cmd(hba, lrbp); + spin_unlock_irqrestore(&hwq->cq_lock, flags); out: return err; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 45dcfaadaf98eb6f6216975764722228d204ea22..4dffcfefd62da22360ba1cc8b467c3946a425030 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -203,8 +203,8 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5829E_ESIM 0x81e4 #define DELL_PRODUCT_5829E 0x81e6 -#define DELL_PRODUCT_FM101R 0x8213 -#define DELL_PRODUCT_FM101R_ESIM 0x8215 +#define DELL_PRODUCT_FM101R_ESIM 0x8213 +#define DELL_PRODUCT_FM101R 0x8215 #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da @@ -609,6 +609,8 @@ static void option_instat_callback(struct urb *urb); #define UNISOC_VENDOR_ID 0x1782 /* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */ #define TOZED_PRODUCT_LT70C 0x4055 +/* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ +#define LUAT_PRODUCT_AIR720U 0x4e00 /* Device flags */ @@ -1546,7 +1548,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0165, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0167, 0xff, 0xff, 0xff), .driver_info = RSVD(4) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff), + .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0191, 0xff, 0xff, 0xff), /* ZTE EuFi890 */ .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0196, 0xff, 0xff, 0xff) }, @@ -2249,6 +2252,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */ { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) }, /* Fibocom L610 (ECM/RNDIS mode) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0001, 0xff, 0xff, 0xff) }, /* Fibocom L716-EU (ECM/RNDIS mode) */ { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ @@ -2271,6 +2275,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index b3a3cb16579552ccec1cd4ec1875ad36376b6cd3..b137f367934393268e57c8e559f6ba1194fab20d 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -437,7 +437,7 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, if (blk->shared_backend) { blk->buffer = shared_buffer; } else { - blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, + blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, GFP_KERNEL); if (!blk->buffer) { ret = -ENOMEM; @@ -495,7 +495,7 @@ static int __init vdpasim_blk_init(void) goto parent_err; if (shared_backend) { - shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, + shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, GFP_KERNEL); if (!shared_buffer) { ret = -ENOMEM; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 30df5c58db73a846dc3c58ad821201694869a348..da7ec77cdaff075b5e66a21be7693cd6b7d30047 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1582,7 +1582,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) err: put_device(&v->dev); - ida_simple_remove(&vhost_vdpa_ida, v->minor); return r; } diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index c2524a7207cfaeed4149748c7890b3ce0f7e6ccd..7a5593997e0efe64f2ce832bf0c8676bbaabf188 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -242,7 +242,7 @@ void vp_del_vqs(struct virtio_device *vdev) if (v != VIRTIO_MSI_NO_VECTOR) { int irq = pci_irq_vector(vp_dev->pci_dev, v); - irq_set_affinity_hint(irq, NULL); + irq_update_affinity_hint(irq, NULL); free_irq(irq, vq); } } @@ -443,10 +443,10 @@ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask) mask = vp_dev->msix_affinity_masks[info->msix_vector]; irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); if (!cpu_mask) - irq_set_affinity_hint(irq, NULL); + irq_update_affinity_hint(irq, NULL); else { cpumask_copy(mask, cpu_mask); - irq_set_affinity_hint(irq, mask); + irq_set_affinity_and_hint(irq, mask); } } return 0; diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index e2a1fe7bb66cc9c4da102f1559da648173c3376f..7de8b1ebabac4217b2240f6d8faaf486b4265f38 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -294,9 +294,10 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) err = -EINVAL; mdev->common = vp_modern_map_capability(mdev, common, - sizeof(struct virtio_pci_common_cfg), 4, - 0, sizeof(struct virtio_pci_modern_common_cfg), - &mdev->common_len, NULL); + sizeof(struct virtio_pci_common_cfg), 4, 0, + offsetofend(struct virtio_pci_modern_common_cfg, + queue_reset), + &mdev->common_len, NULL); if (!mdev->common) goto err_map_common; mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1, diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index b8f2f971c2f0fc6ef6ef3b3e8ab88147b4dc39c3..e3585330cf98b1c7e99a4800913c32cb380fd48f 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -171,11 +171,11 @@ static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl) int i; struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + evtchn_port_t evtchn; /* Timer interrupt has highest priority. */ - irq = irq_from_virq(cpu, VIRQ_TIMER); + irq = irq_evtchn_from_virq(cpu, VIRQ_TIMER, &evtchn); if (irq != -1) { - evtchn_port_t evtchn = evtchn_from_irq(irq); word_idx = evtchn / BITS_PER_LONG; bit_idx = evtchn % BITS_PER_LONG; if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) @@ -328,9 +328,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) { if (sync_test_bit(i, BM(sh->evtchn_pending))) { int word_idx = i / BITS_PER_EVTCHN_WORD; - printk(" %d: event %d -> irq %d%s%s%s\n", + printk(" %d: event %d -> irq %u%s%s%s\n", cpu_from_evtchn(i), i, - get_evtchn_to_irq(i), + irq_from_evtchn(i), sync_test_bit(word_idx, BM(&v->evtchn_pending_sel)) ? "" : " l2-clear", !sync_test_bit(i, BM(sh->evtchn_mask)) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6de6b084ea60d5f4cadd56e27a2fc02f8294df06..f5edb9e27e3ca1e33edd88fce970e57ca228430c 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -164,6 +164,8 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping */ static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; +/* Cache for IPI event channels - needed for hot cpu unplug (avoid RCU usage). */ +static DEFINE_PER_CPU(evtchn_port_t [XEN_NR_IPIS], ipi_to_evtchn) = {[0 ... XEN_NR_IPIS-1] = 0}; /* Event channel distribution data */ static atomic_t channels_on_cpu[NR_CPUS]; @@ -172,7 +174,7 @@ static int **evtchn_to_irq; #ifdef CONFIG_X86 static unsigned long *pirq_eoi_map; #endif -static bool (*pirq_needs_eoi)(unsigned irq); +static bool (*pirq_needs_eoi)(struct irq_info *info); #define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq))) #define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq))) @@ -188,7 +190,6 @@ static struct irq_chip xen_lateeoi_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); -static void disable_dynirq(struct irq_data *data); static DEFINE_PER_CPU(unsigned int, irq_epoch); @@ -246,15 +247,6 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) return 0; } -int get_evtchn_to_irq(evtchn_port_t evtchn) -{ - if (evtchn >= xen_evtchn_max_channels()) - return -1; - if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) - return -1; - return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); -} - /* Get info for IRQ */ static struct irq_info *info_for_irq(unsigned irq) { @@ -272,6 +264,19 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info) irq_set_chip_data(irq, info); } +static struct irq_info *evtchn_to_info(evtchn_port_t evtchn) +{ + int irq; + + if (evtchn >= xen_evtchn_max_channels()) + return NULL; + if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) + return NULL; + irq = READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); + + return (irq < 0) ? NULL : info_for_irq(irq); +} + /* Per CPU channel accounting */ static void channels_on_cpu_dec(struct irq_info *info) { @@ -298,6 +303,13 @@ static void channels_on_cpu_inc(struct irq_info *info) info->is_accounted = 1; } +static void xen_irq_free_desc(unsigned int irq) +{ + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq >= nr_legacy_irqs()) + irq_free_desc(irq); +} + static void delayed_free_irq(struct work_struct *work) { struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, @@ -309,14 +321,11 @@ static void delayed_free_irq(struct work_struct *work) kfree(info); - /* Legacy IRQ descriptors are managed by the arch. */ - if (irq >= nr_legacy_irqs()) - irq_free_desc(irq); + xen_irq_free_desc(irq); } /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, - unsigned irq, enum xen_irq_type type, evtchn_port_t evtchn, unsigned short cpu) @@ -326,29 +335,27 @@ static int xen_irq_info_common_setup(struct irq_info *info, BUG_ON(info->type != IRQT_UNBOUND && info->type != type); info->type = type; - info->irq = irq; info->evtchn = evtchn; info->cpu = cpu; info->mask_reason = EVT_MASK_REASON_EXPLICIT; raw_spin_lock_init(&info->lock); - ret = set_evtchn_to_irq(evtchn, irq); + ret = set_evtchn_to_irq(evtchn, info->irq); if (ret < 0) return ret; - irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); + irq_clear_status_flags(info->irq, IRQ_NOREQUEST | IRQ_NOAUTOEN); return xen_evtchn_port_setup(evtchn); } -static int xen_irq_info_evtchn_setup(unsigned irq, +static int xen_irq_info_evtchn_setup(struct irq_info *info, evtchn_port_t evtchn, struct xenbus_device *dev) { - struct irq_info *info = info_for_irq(irq); int ret; - ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0); + ret = xen_irq_info_common_setup(info, IRQT_EVTCHN, evtchn, 0); info->u.interdomain = dev; if (dev) atomic_inc(&dev->event_channels); @@ -356,49 +363,37 @@ static int xen_irq_info_evtchn_setup(unsigned irq, return ret; } -static int xen_irq_info_ipi_setup(unsigned cpu, - unsigned irq, - evtchn_port_t evtchn, - enum ipi_vector ipi) +static int xen_irq_info_ipi_setup(struct irq_info *info, unsigned int cpu, + evtchn_port_t evtchn, enum ipi_vector ipi) { - struct irq_info *info = info_for_irq(irq); - info->u.ipi = ipi; - per_cpu(ipi_to_irq, cpu)[ipi] = irq; + per_cpu(ipi_to_irq, cpu)[ipi] = info->irq; + per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn; - return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0); + return xen_irq_info_common_setup(info, IRQT_IPI, evtchn, 0); } -static int xen_irq_info_virq_setup(unsigned cpu, - unsigned irq, - evtchn_port_t evtchn, - unsigned virq) +static int xen_irq_info_virq_setup(struct irq_info *info, unsigned int cpu, + evtchn_port_t evtchn, unsigned int virq) { - struct irq_info *info = info_for_irq(irq); - info->u.virq = virq; - per_cpu(virq_to_irq, cpu)[virq] = irq; + per_cpu(virq_to_irq, cpu)[virq] = info->irq; - return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0); + return xen_irq_info_common_setup(info, IRQT_VIRQ, evtchn, 0); } -static int xen_irq_info_pirq_setup(unsigned irq, - evtchn_port_t evtchn, - unsigned pirq, - unsigned gsi, - uint16_t domid, - unsigned char flags) +static int xen_irq_info_pirq_setup(struct irq_info *info, evtchn_port_t evtchn, + unsigned int pirq, unsigned int gsi, + uint16_t domid, unsigned char flags) { - struct irq_info *info = info_for_irq(irq); - info->u.pirq.pirq = pirq; info->u.pirq.gsi = gsi; info->u.pirq.domid = domid; info->u.pirq.flags = flags; - return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0); + return xen_irq_info_common_setup(info, IRQT_PIRQ, evtchn, 0); } static void xen_irq_info_cleanup(struct irq_info *info) @@ -412,7 +407,7 @@ static void xen_irq_info_cleanup(struct irq_info *info) /* * Accessors for packed IRQ information. */ -evtchn_port_t evtchn_from_irq(unsigned irq) +static evtchn_port_t evtchn_from_irq(unsigned int irq) { const struct irq_info *info = NULL; @@ -426,64 +421,51 @@ evtchn_port_t evtchn_from_irq(unsigned irq) unsigned int irq_from_evtchn(evtchn_port_t evtchn) { - return get_evtchn_to_irq(evtchn); + struct irq_info *info = evtchn_to_info(evtchn); + + return info ? info->irq : -1; } EXPORT_SYMBOL_GPL(irq_from_evtchn); -int irq_from_virq(unsigned int cpu, unsigned int virq) +int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq, + evtchn_port_t *evtchn) { - return per_cpu(virq_to_irq, cpu)[virq]; + int irq = per_cpu(virq_to_irq, cpu)[virq]; + + *evtchn = evtchn_from_irq(irq); + + return irq; } -static enum ipi_vector ipi_from_irq(unsigned irq) +static enum ipi_vector ipi_from_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - BUG_ON(info == NULL); BUG_ON(info->type != IRQT_IPI); return info->u.ipi; } -static unsigned virq_from_irq(unsigned irq) +static unsigned int virq_from_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - BUG_ON(info == NULL); BUG_ON(info->type != IRQT_VIRQ); return info->u.virq; } -static unsigned pirq_from_irq(unsigned irq) +static unsigned int pirq_from_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - BUG_ON(info == NULL); BUG_ON(info->type != IRQT_PIRQ); return info->u.pirq.pirq; } -static enum xen_irq_type type_from_irq(unsigned irq) -{ - return info_for_irq(irq)->type; -} - -static unsigned cpu_from_irq(unsigned irq) -{ - return info_for_irq(irq)->cpu; -} - unsigned int cpu_from_evtchn(evtchn_port_t evtchn) { - int irq = get_evtchn_to_irq(evtchn); - unsigned ret = 0; - - if (irq != -1) - ret = cpu_from_irq(irq); + struct irq_info *info = evtchn_to_info(evtchn); - return ret; + return info ? info->cpu : 0; } static void do_mask(struct irq_info *info, u8 reason) @@ -515,36 +497,30 @@ static void do_unmask(struct irq_info *info, u8 reason) } #ifdef CONFIG_X86 -static bool pirq_check_eoi_map(unsigned irq) +static bool pirq_check_eoi_map(struct irq_info *info) { - return test_bit(pirq_from_irq(irq), pirq_eoi_map); + return test_bit(pirq_from_irq(info), pirq_eoi_map); } #endif -static bool pirq_needs_eoi_flag(unsigned irq) +static bool pirq_needs_eoi_flag(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); BUG_ON(info->type != IRQT_PIRQ); return info->u.pirq.flags & PIRQ_NEEDS_EOI; } -static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, +static void bind_evtchn_to_cpu(struct irq_info *info, unsigned int cpu, bool force_affinity) { - int irq = get_evtchn_to_irq(evtchn); - struct irq_info *info = info_for_irq(irq); - - BUG_ON(irq == -1); - if (IS_ENABLED(CONFIG_SMP) && force_affinity) { - struct irq_data *data = irq_get_irq_data(irq); + struct irq_data *data = irq_get_irq_data(info->irq); irq_data_update_affinity(data, cpumask_of(cpu)); irq_data_update_effective_affinity(data, cpumask_of(cpu)); } - xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); + xen_evtchn_port_bind_to_cpu(info->evtchn, cpu, info->cpu); channels_on_cpu_dec(info); info->cpu = cpu; @@ -601,7 +577,9 @@ static void lateeoi_list_add(struct irq_info *info) spin_lock_irqsave(&eoi->eoi_list_lock, flags); - if (list_empty(&eoi->eoi_list)) { + elem = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, + eoi_list); + if (!elem || info->eoi_time < elem->eoi_time) { list_add(&info->eoi_list, &eoi->eoi_list); mod_delayed_work_on(info->eoi_cpu, system_wq, &eoi->delayed, delay); @@ -732,50 +710,49 @@ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) } EXPORT_SYMBOL_GPL(xen_irq_lateeoi); -static void xen_irq_init(unsigned irq) +static struct irq_info *xen_irq_init(unsigned int irq) { struct irq_info *info; info = kzalloc(sizeof(*info), GFP_KERNEL); - if (info == NULL) - panic("Unable to allocate metadata for IRQ%d\n", irq); + if (info) { + info->irq = irq; + info->type = IRQT_UNBOUND; + info->refcnt = -1; + INIT_RCU_WORK(&info->rwork, delayed_free_irq); - info->type = IRQT_UNBOUND; - info->refcnt = -1; - INIT_RCU_WORK(&info->rwork, delayed_free_irq); + set_info_for_irq(irq, info); + /* + * Interrupt affinity setting can be immediate. No point + * in delaying it until an interrupt is handled. + */ + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - set_info_for_irq(irq, info); - /* - * Interrupt affinity setting can be immediate. No point - * in delaying it until an interrupt is handled. - */ - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); + INIT_LIST_HEAD(&info->eoi_list); + list_add_tail(&info->list, &xen_irq_list_head); + } - INIT_LIST_HEAD(&info->eoi_list); - list_add_tail(&info->list, &xen_irq_list_head); + return info; } -static int __must_check xen_allocate_irqs_dynamic(int nvec) +static struct irq_info *xen_allocate_irq_dynamic(void) { - int i, irq = irq_alloc_descs(-1, 0, nvec, -1); + int irq = irq_alloc_desc_from(0, -1); + struct irq_info *info = NULL; if (irq >= 0) { - for (i = 0; i < nvec; i++) - xen_irq_init(irq + i); + info = xen_irq_init(irq); + if (!info) + xen_irq_free_desc(irq); } - return irq; -} - -static inline int __must_check xen_allocate_irq_dynamic(void) -{ - - return xen_allocate_irqs_dynamic(1); + return info; } -static int __must_check xen_allocate_irq_gsi(unsigned gsi) +static struct irq_info *xen_allocate_irq_gsi(unsigned int gsi) { int irq; + struct irq_info *info; /* * A PV guest has no concept of a GSI (since it has no ACPI @@ -792,15 +769,15 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) else irq = irq_alloc_desc_at(gsi, -1); - xen_irq_init(irq); + info = xen_irq_init(irq); + if (!info) + xen_irq_free_desc(irq); - return irq; + return info; } -static void xen_free_irq(unsigned irq) +static void xen_free_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - if (WARN_ON(!info)) return; @@ -821,14 +798,11 @@ static void event_handler_exit(struct irq_info *info) clear_evtchn(info->evtchn); } -static void pirq_query_unmask(int irq) +static void pirq_query_unmask(struct irq_info *info) { struct physdev_irq_status_query irq_status; - struct irq_info *info = info_for_irq(irq); - - BUG_ON(info->type != IRQT_PIRQ); - irq_status.irq = pirq_from_irq(irq); + irq_status.irq = pirq_from_irq(info); if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) irq_status.flags = 0; @@ -837,61 +811,81 @@ static void pirq_query_unmask(int irq) info->u.pirq.flags |= PIRQ_NEEDS_EOI; } -static void eoi_pirq(struct irq_data *data) +static void do_eoi_pirq(struct irq_info *info) { - struct irq_info *info = info_for_irq(data->irq); - evtchn_port_t evtchn = info ? info->evtchn : 0; - struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; + struct physdev_eoi eoi = { .irq = pirq_from_irq(info) }; int rc = 0; - if (!VALID_EVTCHN(evtchn)) + if (!VALID_EVTCHN(info->evtchn)) return; event_handler_exit(info); - if (pirq_needs_eoi(data->irq)) { + if (pirq_needs_eoi(info)) { rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); WARN_ON(rc); } } +static void eoi_pirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + + do_eoi_pirq(info); +} + +static void do_disable_dynirq(struct irq_info *info) +{ + if (VALID_EVTCHN(info->evtchn)) + do_mask(info, EVT_MASK_REASON_EXPLICIT); +} + +static void disable_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + + if (info) + do_disable_dynirq(info); +} + static void mask_ack_pirq(struct irq_data *data) { - disable_dynirq(data); - eoi_pirq(data); + struct irq_info *info = info_for_irq(data->irq); + + if (info) { + do_disable_dynirq(info); + do_eoi_pirq(info); + } } -static unsigned int __startup_pirq(unsigned int irq) +static unsigned int __startup_pirq(struct irq_info *info) { struct evtchn_bind_pirq bind_pirq; - struct irq_info *info = info_for_irq(irq); - evtchn_port_t evtchn = evtchn_from_irq(irq); + evtchn_port_t evtchn = info->evtchn; int rc; - BUG_ON(info->type != IRQT_PIRQ); - if (VALID_EVTCHN(evtchn)) goto out; - bind_pirq.pirq = pirq_from_irq(irq); + bind_pirq.pirq = pirq_from_irq(info); /* NB. We are happy to share unless we are probing. */ bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? BIND_PIRQ__WILL_SHARE : 0; rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq); if (rc != 0) { - pr_warn("Failed to obtain physical IRQ %d\n", irq); + pr_warn("Failed to obtain physical IRQ %d\n", info->irq); return 0; } evtchn = bind_pirq.port; - pirq_query_unmask(irq); + pirq_query_unmask(info); - rc = set_evtchn_to_irq(evtchn, irq); + rc = set_evtchn_to_irq(evtchn, info->irq); if (rc) goto err; info->evtchn = evtchn; - bind_evtchn_to_cpu(evtchn, 0, false); + bind_evtchn_to_cpu(info, 0, false); rc = xen_evtchn_port_setup(evtchn); if (rc) @@ -900,26 +894,28 @@ static unsigned int __startup_pirq(unsigned int irq) out: do_unmask(info, EVT_MASK_REASON_EXPLICIT); - eoi_pirq(irq_get_irq_data(irq)); + do_eoi_pirq(info); return 0; err: - pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc); + pr_err("irq%d: Failed to set port to irq mapping (%d)\n", info->irq, + rc); xen_evtchn_close(evtchn); return 0; } static unsigned int startup_pirq(struct irq_data *data) { - return __startup_pirq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + + return __startup_pirq(info); } static void shutdown_pirq(struct irq_data *data) { - unsigned int irq = data->irq; - struct irq_info *info = info_for_irq(irq); - evtchn_port_t evtchn = evtchn_from_irq(irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info->evtchn; BUG_ON(info->type != IRQT_PIRQ); @@ -957,10 +953,14 @@ int xen_irq_from_gsi(unsigned gsi) } EXPORT_SYMBOL_GPL(xen_irq_from_gsi); -static void __unbind_from_irq(unsigned int irq) +static void __unbind_from_irq(struct irq_info *info, unsigned int irq) { - evtchn_port_t evtchn = evtchn_from_irq(irq); - struct irq_info *info = info_for_irq(irq); + evtchn_port_t evtchn; + + if (!info) { + xen_irq_free_desc(irq); + return; + } if (info->refcnt > 0) { info->refcnt--; @@ -968,19 +968,22 @@ static void __unbind_from_irq(unsigned int irq) return; } + evtchn = info->evtchn; + if (VALID_EVTCHN(evtchn)) { - unsigned int cpu = cpu_from_irq(irq); + unsigned int cpu = info->cpu; struct xenbus_device *dev; if (!info->is_static) xen_evtchn_close(evtchn); - switch (type_from_irq(irq)) { + switch (info->type) { case IRQT_VIRQ: - per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; + per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1; break; case IRQT_IPI: - per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1; + per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1; + per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0; break; case IRQT_EVTCHN: dev = info->u.interdomain; @@ -994,7 +997,7 @@ static void __unbind_from_irq(unsigned int irq) xen_irq_info_cleanup(info); } - xen_free_irq(irq); + xen_free_irq(info); } /* @@ -1010,24 +1013,24 @@ static void __unbind_from_irq(unsigned int irq) int xen_bind_pirq_gsi_to_irq(unsigned gsi, unsigned pirq, int shareable, char *name) { - int irq; + struct irq_info *info; struct physdev_irq irq_op; int ret; mutex_lock(&irq_mapping_update_lock); - irq = xen_irq_from_gsi(gsi); - if (irq != -1) { + ret = xen_irq_from_gsi(gsi); + if (ret != -1) { pr_info("%s: returning irq %d for gsi %u\n", - __func__, irq, gsi); + __func__, ret, gsi); goto out; } - irq = xen_allocate_irq_gsi(gsi); - if (irq < 0) + info = xen_allocate_irq_gsi(gsi); + if (!info) goto out; - irq_op.irq = irq; + irq_op.irq = info->irq; irq_op.vector = 0; /* Only the privileged domain can do this. For non-priv, the pcifront @@ -1035,20 +1038,19 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, * this in the priv domain. */ if (xen_initial_domain() && HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { - xen_free_irq(irq); - irq = -ENOSPC; + xen_free_irq(info); + ret = -ENOSPC; goto out; } - ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF, + ret = xen_irq_info_pirq_setup(info, 0, pirq, gsi, DOMID_SELF, shareable ? PIRQ_SHAREABLE : 0); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } - pirq_query_unmask(irq); + pirq_query_unmask(info); /* We try to use the handler with the appropriate semantic for the * type of interrupt: if the interrupt is an edge triggered * interrupt we use handle_edge_irq. @@ -1065,16 +1067,18 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, * is the right choice either way. */ if (shareable) - irq_set_chip_and_handler_name(irq, &xen_pirq_chip, + irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip, handle_fasteoi_irq, name); else - irq_set_chip_and_handler_name(irq, &xen_pirq_chip, + irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip, handle_edge_irq, name); + ret = info->irq; + out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } #ifdef CONFIG_PCI_MSI @@ -1096,17 +1100,22 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, int pirq, int nvec, const char *name, domid_t domid) { int i, irq, ret; + struct irq_info *info; mutex_lock(&irq_mapping_update_lock); - irq = xen_allocate_irqs_dynamic(nvec); + irq = irq_alloc_descs(-1, 0, nvec, -1); if (irq < 0) goto out; for (i = 0; i < nvec; i++) { + info = xen_irq_init(irq + i); + if (!info) + goto error_irq; + irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name); - ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid, + ret = xen_irq_info_pirq_setup(info, 0, pirq + i, 0, domid, i == 0 ? 0 : PIRQ_MSI_GROUP); if (ret < 0) goto error_irq; @@ -1118,9 +1127,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, out: mutex_unlock(&irq_mapping_update_lock); return irq; + error_irq: - while (nvec--) - __unbind_from_irq(irq + nvec); + while (nvec--) { + info = info_for_irq(irq + nvec); + __unbind_from_irq(info, irq + nvec); + } mutex_unlock(&irq_mapping_update_lock); return ret; } @@ -1156,67 +1168,45 @@ int xen_destroy_irq(int irq) } } - xen_free_irq(irq); + xen_free_irq(info); out: mutex_unlock(&irq_mapping_update_lock); return rc; } -int xen_irq_from_pirq(unsigned pirq) -{ - int irq; - - struct irq_info *info; - - mutex_lock(&irq_mapping_update_lock); - - list_for_each_entry(info, &xen_irq_list_head, list) { - if (info->type != IRQT_PIRQ) - continue; - irq = info->irq; - if (info->u.pirq.pirq == pirq) - goto out; - } - irq = -1; -out: - mutex_unlock(&irq_mapping_update_lock); - - return irq; -} - - int xen_pirq_from_irq(unsigned irq) { - return pirq_from_irq(irq); + struct irq_info *info = info_for_irq(irq); + + return pirq_from_irq(info); } EXPORT_SYMBOL_GPL(xen_pirq_from_irq); static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, struct xenbus_device *dev) { - int irq; - int ret; + int ret = -ENOMEM; + struct irq_info *info; if (evtchn >= xen_evtchn_max_channels()) return -ENOMEM; mutex_lock(&irq_mapping_update_lock); - irq = get_evtchn_to_irq(evtchn); + info = evtchn_to_info(evtchn); - if (irq == -1) { - irq = xen_allocate_irq_dynamic(); - if (irq < 0) + if (!info) { + info = xen_allocate_irq_dynamic(); + if (!info) goto out; - irq_set_chip_and_handler_name(irq, chip, + irq_set_chip_and_handler_name(info->irq, chip, handle_edge_irq, "event"); - ret = xen_irq_info_evtchn_setup(irq, evtchn, dev); + ret = xen_irq_info_evtchn_setup(info, evtchn, dev); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } /* @@ -1226,17 +1216,17 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, * affinity setting is not invoked on them so nothing would * bind the channel. */ - bind_evtchn_to_cpu(evtchn, 0, false); - } else { - struct irq_info *info = info_for_irq(irq); - if (!WARN_ON(!info || info->type != IRQT_EVTCHN)) - info->refcnt++; + bind_evtchn_to_cpu(info, 0, false); + } else if (!WARN_ON(info->type != IRQT_EVTCHN)) { + info->refcnt++; } + ret = info->irq; + out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } int bind_evtchn_to_irq(evtchn_port_t evtchn) @@ -1255,18 +1245,19 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; evtchn_port_t evtchn; - int ret, irq; + struct irq_info *info; + int ret; mutex_lock(&irq_mapping_update_lock); - irq = per_cpu(ipi_to_irq, cpu)[ipi]; + ret = per_cpu(ipi_to_irq, cpu)[ipi]; - if (irq == -1) { - irq = xen_allocate_irq_dynamic(); - if (irq < 0) + if (ret == -1) { + info = xen_allocate_irq_dynamic(); + if (!info) goto out; - irq_set_chip_and_handler_name(irq, &xen_percpu_chip, + irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip, handle_percpu_irq, "ipi"); bind_ipi.vcpu = xen_vcpu_nr(cpu); @@ -1275,25 +1266,25 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) BUG(); evtchn = bind_ipi.port; - ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); + ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } /* * Force the affinity mask to the target CPU so proc shows * the correct target. */ - bind_evtchn_to_cpu(evtchn, cpu, true); + bind_evtchn_to_cpu(info, cpu, true); + ret = info->irq; } else { - struct irq_info *info = info_for_irq(irq); + info = info_for_irq(ret); WARN_ON(info == NULL || info->type != IRQT_IPI); } out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev, @@ -1361,22 +1352,23 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) { struct evtchn_bind_virq bind_virq; evtchn_port_t evtchn = 0; - int irq, ret; + struct irq_info *info; + int ret; mutex_lock(&irq_mapping_update_lock); - irq = per_cpu(virq_to_irq, cpu)[virq]; + ret = per_cpu(virq_to_irq, cpu)[virq]; - if (irq == -1) { - irq = xen_allocate_irq_dynamic(); - if (irq < 0) + if (ret == -1) { + info = xen_allocate_irq_dynamic(); + if (!info) goto out; if (percpu) - irq_set_chip_and_handler_name(irq, &xen_percpu_chip, + irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip, handle_percpu_irq, "virq"); else - irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + irq_set_chip_and_handler_name(info->irq, &xen_dynamic_chip, handle_edge_irq, "virq"); bind_virq.virq = virq; @@ -1391,10 +1383,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) BUG_ON(ret < 0); } - ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq); + ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } @@ -1402,22 +1393,26 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) * Force the affinity mask for percpu interrupts so proc * shows the correct target. */ - bind_evtchn_to_cpu(evtchn, cpu, percpu); + bind_evtchn_to_cpu(info, cpu, percpu); + ret = info->irq; } else { - struct irq_info *info = info_for_irq(irq); + info = info_for_irq(ret); WARN_ON(info == NULL || info->type != IRQT_VIRQ); } out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } static void unbind_from_irq(unsigned int irq) { + struct irq_info *info; + mutex_lock(&irq_mapping_update_lock); - __unbind_from_irq(irq); + info = info_for_irq(irq); + __unbind_from_irq(info, irq); mutex_unlock(&irq_mapping_update_lock); } @@ -1568,13 +1563,7 @@ EXPORT_SYMBOL_GPL(xen_set_irq_priority); int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static) { - int irq = get_evtchn_to_irq(evtchn); - struct irq_info *info; - - if (irq == -1) - return -ENOENT; - - info = info_for_irq(irq); + struct irq_info *info = evtchn_to_info(evtchn); if (!info) return -ENOENT; @@ -1590,7 +1579,6 @@ EXPORT_SYMBOL_GPL(evtchn_make_refcounted); int evtchn_get(evtchn_port_t evtchn) { - int irq; struct irq_info *info; int err = -ENOENT; @@ -1599,11 +1587,7 @@ int evtchn_get(evtchn_port_t evtchn) mutex_lock(&irq_mapping_update_lock); - irq = get_evtchn_to_irq(evtchn); - if (irq == -1) - goto done; - - info = info_for_irq(irq); + info = evtchn_to_info(evtchn); if (!info) goto done; @@ -1623,16 +1607,17 @@ EXPORT_SYMBOL_GPL(evtchn_get); void evtchn_put(evtchn_port_t evtchn) { - int irq = get_evtchn_to_irq(evtchn); - if (WARN_ON(irq == -1)) + struct irq_info *info = evtchn_to_info(evtchn); + + if (WARN_ON(!info)) return; - unbind_from_irq(irq); + unbind_from_irq(info->irq); } EXPORT_SYMBOL_GPL(evtchn_put); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) { - int irq; + evtchn_port_t evtchn; #ifdef CONFIG_X86 if (unlikely(vector == XEN_NMI_VECTOR)) { @@ -1643,9 +1628,9 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) return; } #endif - irq = per_cpu(ipi_to_irq, cpu)[vector]; - BUG_ON(irq < 0); - notify_remote_via_irq(irq); + evtchn = per_cpu(ipi_to_evtchn, cpu)[vector]; + BUG_ON(evtchn == 0); + notify_remote_via_evtchn(evtchn); } struct evtchn_loop_ctrl { @@ -1656,12 +1641,10 @@ struct evtchn_loop_ctrl { void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) { - int irq; - struct irq_info *info; + struct irq_info *info = evtchn_to_info(port); struct xenbus_device *dev; - irq = get_evtchn_to_irq(port); - if (irq == -1) + if (!info) return; /* @@ -1686,7 +1669,6 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) } } - info = info_for_irq(irq); if (xchg_acquire(&info->is_active, 1)) return; @@ -1700,7 +1682,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) info->eoi_time = get_jiffies_64() + event_eoi_delay; } - generic_handle_irq(irq); + generic_handle_irq(info->irq); } int xen_evtchn_do_upcall(void) @@ -1758,16 +1740,17 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) mutex_lock(&irq_mapping_update_lock); /* After resume the irq<->evtchn mappings are all cleared out */ - BUG_ON(get_evtchn_to_irq(evtchn) != -1); + BUG_ON(evtchn_to_info(evtchn)); /* Expect irq to have been bound before, so there should be a proper type */ BUG_ON(info->type == IRQT_UNBOUND); - (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL); + info->irq = irq; + (void)xen_irq_info_evtchn_setup(info, evtchn, NULL); mutex_unlock(&irq_mapping_update_lock); - bind_evtchn_to_cpu(evtchn, info->cpu, false); + bind_evtchn_to_cpu(info, info->cpu, false); /* Unmask the event channel. */ enable_irq(irq); @@ -1801,7 +1784,7 @@ static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu) * it, but don't do the xenlinux-level rebind in that case. */ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) - bind_evtchn_to_cpu(evtchn, tcpu, false); + bind_evtchn_to_cpu(info, tcpu, false); do_unmask(info, EVT_MASK_REASON_TEMPORARY); @@ -1858,28 +1841,30 @@ static void enable_dynirq(struct irq_data *data) do_unmask(info, EVT_MASK_REASON_EXPLICIT); } -static void disable_dynirq(struct irq_data *data) +static void do_ack_dynirq(struct irq_info *info) { - struct irq_info *info = info_for_irq(data->irq); - evtchn_port_t evtchn = info ? info->evtchn : 0; + evtchn_port_t evtchn = info->evtchn; if (VALID_EVTCHN(evtchn)) - do_mask(info, EVT_MASK_REASON_EXPLICIT); + event_handler_exit(info); } static void ack_dynirq(struct irq_data *data) { struct irq_info *info = info_for_irq(data->irq); - evtchn_port_t evtchn = info ? info->evtchn : 0; - if (VALID_EVTCHN(evtchn)) - event_handler_exit(info); + if (info) + do_ack_dynirq(info); } static void mask_ack_dynirq(struct irq_data *data) { - disable_dynirq(data); - ack_dynirq(data); + struct irq_info *info = info_for_irq(data->irq); + + if (info) { + do_disable_dynirq(info); + do_ack_dynirq(info); + } } static void lateeoi_ack_dynirq(struct irq_data *data) @@ -1952,13 +1937,13 @@ static void restore_pirqs(void) if (rc) { pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n", gsi, irq, pirq, rc); - xen_free_irq(irq); + xen_free_irq(info); continue; } printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); - __startup_pirq(irq); + __startup_pirq(info); } } @@ -1966,13 +1951,15 @@ static void restore_cpu_virqs(unsigned int cpu) { struct evtchn_bind_virq bind_virq; evtchn_port_t evtchn; + struct irq_info *info; int virq, irq; for (virq = 0; virq < NR_VIRQS; virq++) { if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) continue; + info = info_for_irq(irq); - BUG_ON(virq_from_irq(irq) != virq); + BUG_ON(virq_from_irq(info) != virq); /* Get a new binding from Xen. */ bind_virq.virq = virq; @@ -1983,9 +1970,9 @@ static void restore_cpu_virqs(unsigned int cpu) evtchn = bind_virq.port; /* Record the new mapping. */ - (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq); + xen_irq_info_virq_setup(info, cpu, evtchn, virq); /* The affinity mask is still valid */ - bind_evtchn_to_cpu(evtchn, cpu, false); + bind_evtchn_to_cpu(info, cpu, false); } } @@ -1993,13 +1980,15 @@ static void restore_cpu_ipis(unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; evtchn_port_t evtchn; + struct irq_info *info; int ipi, irq; for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) continue; + info = info_for_irq(irq); - BUG_ON(ipi_from_irq(irq) != ipi); + BUG_ON(ipi_from_irq(info) != ipi); /* Get a new binding from Xen. */ bind_ipi.vcpu = xen_vcpu_nr(cpu); @@ -2009,9 +1998,9 @@ static void restore_cpu_ipis(unsigned int cpu) evtchn = bind_ipi.port; /* Record the new mapping. */ - (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); + xen_irq_info_ipi_setup(info, cpu, evtchn, ipi); /* The affinity mask is still valid */ - bind_evtchn_to_cpu(evtchn, cpu, false); + bind_evtchn_to_cpu(info, cpu, false); } } @@ -2025,13 +2014,6 @@ void xen_clear_irq_pending(int irq) event_handler_exit(info); } EXPORT_SYMBOL(xen_clear_irq_pending); -void xen_set_irq_pending(int irq) -{ - evtchn_port_t evtchn = evtchn_from_irq(irq); - - if (VALID_EVTCHN(evtchn)) - set_evtchn(evtchn); -} bool xen_test_irq_pending(int irq) { diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 4d3398eff9cdf1567a5f64dcb63561680dda405e..19ae31695edcf1a2e5cc93863c515a61c43496d9 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -33,7 +33,6 @@ struct evtchn_ops { extern const struct evtchn_ops *evtchn_ops; -int get_evtchn_to_irq(evtchn_port_t evtchn); void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl); unsigned int cpu_from_evtchn(evtchn_port_t evtchn); diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index b3e3d1bb37f3e388d5ed27b6fb855cb419b3d54e..50865527314538a8bedbde0f2590fbbb4afce3ce 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -47,6 +47,9 @@ #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#ifdef CONFIG_ACPI +#include <acpi/processor.h> +#endif /* * @cpu_id: Xen physical cpu logic number @@ -400,4 +403,23 @@ bool __init xen_processor_present(uint32_t acpi_id) return online; } + +void xen_sanitize_proc_cap_bits(uint32_t *cap) +{ + struct xen_platform_op op = { + .cmd = XENPF_set_processor_pminfo, + .u.set_pminfo.id = -1, + .u.set_pminfo.type = XEN_PM_PDC, + }; + u32 buf[3] = { ACPI_PDC_REVISION_ID, 1, *cap }; + int ret; + + set_xen_guest_handle(op.u.set_pminfo.pdc, buf); + ret = HYPERVISOR_platform_op(&op); + if (ret) + pr_err("sanitize of _PDC buffer bits from Xen failed: %d\n", + ret); + else + *cap = buf[2]; +} #endif diff --git a/drivers/xen/xen-front-pgdir-shbuf.c b/drivers/xen/xen-front-pgdir-shbuf.c index b52e0fa595a992a9d5c0a321b9fed2b725ee3de3..223870a0111b27b8f8d6c17034780bfecf8c3e90 100644 --- a/drivers/xen/xen-front-pgdir-shbuf.c +++ b/drivers/xen/xen-front-pgdir-shbuf.c @@ -21,7 +21,7 @@ #include <xen/xen-front-pgdir-shbuf.h> -/** +/* * This structure represents the structure of a shared page * that contains grant references to the pages of the shared * buffer. This structure is common to many Xen para-virtualized @@ -33,7 +33,7 @@ struct xen_page_directory { grant_ref_t gref[]; /* Variable length */ }; -/** +/* * Shared buffer ops which are differently implemented * depending on the allocation mode, e.g. if the buffer * is allocated by the corresponding backend or frontend. @@ -61,7 +61,7 @@ struct xen_front_pgdir_shbuf_ops { int (*unmap)(struct xen_front_pgdir_shbuf *buf); }; -/** +/* * Get granted reference to the very first page of the * page directory. Usually this is passed to the backend, * so it can find/fill the grant references to the buffer's @@ -81,7 +81,7 @@ xen_front_pgdir_shbuf_get_dir_start(struct xen_front_pgdir_shbuf *buf) } EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_get_dir_start); -/** +/* * Map granted references of the shared buffer. * * Depending on the shared buffer mode of allocation @@ -102,7 +102,7 @@ int xen_front_pgdir_shbuf_map(struct xen_front_pgdir_shbuf *buf) } EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_map); -/** +/* * Unmap granted references of the shared buffer. * * Depending on the shared buffer mode of allocation @@ -123,7 +123,7 @@ int xen_front_pgdir_shbuf_unmap(struct xen_front_pgdir_shbuf *buf) } EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_unmap); -/** +/* * Free all the resources of the shared buffer. * * \param buf shared buffer which resources to be freed. @@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_free); offsetof(struct xen_page_directory, \ gref)) / sizeof(grant_ref_t)) -/** +/* * Get the number of pages the page directory consumes itself. * * \param buf shared buffer. @@ -160,7 +160,7 @@ static int get_num_pages_dir(struct xen_front_pgdir_shbuf *buf) return DIV_ROUND_UP(buf->num_pages, XEN_NUM_GREFS_PER_PAGE); } -/** +/* * Calculate the number of grant references needed to share the buffer * and its pages when backend allocates the buffer. * @@ -172,7 +172,7 @@ static void backend_calc_num_grefs(struct xen_front_pgdir_shbuf *buf) buf->num_grefs = get_num_pages_dir(buf); } -/** +/* * Calculate the number of grant references needed to share the buffer * and its pages when frontend allocates the buffer. * @@ -190,7 +190,7 @@ static void guest_calc_num_grefs(struct xen_front_pgdir_shbuf *buf) #define xen_page_to_vaddr(page) \ ((uintptr_t)pfn_to_kaddr(page_to_xen_pfn(page))) -/** +/* * Unmap the buffer previously mapped with grant references * provided by the backend. * @@ -238,7 +238,7 @@ static int backend_unmap(struct xen_front_pgdir_shbuf *buf) return ret; } -/** +/* * Map the buffer with grant references provided by the backend. * * \param buf shared buffer. @@ -320,7 +320,7 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf) return ret; } -/** +/* * Fill page directory with grant references to the pages of the * page directory itself. * @@ -350,7 +350,7 @@ static void backend_fill_page_dir(struct xen_front_pgdir_shbuf *buf) page_dir->gref_dir_next_page = XEN_GREF_LIST_END; } -/** +/* * Fill page directory with grant references to the pages of the * page directory and the buffer we share with the backend. * @@ -389,7 +389,7 @@ static void guest_fill_page_dir(struct xen_front_pgdir_shbuf *buf) } } -/** +/* * Grant references to the frontend's buffer pages. * * These will be shared with the backend, so it can @@ -418,7 +418,7 @@ static int guest_grant_refs_for_buffer(struct xen_front_pgdir_shbuf *buf, return 0; } -/** +/* * Grant all the references needed to share the buffer. * * Grant references to the page directory pages and, if @@ -466,7 +466,7 @@ static int grant_references(struct xen_front_pgdir_shbuf *buf) return 0; } -/** +/* * Allocate all required structures to mange shared buffer. * * \param buf shared buffer. @@ -506,7 +506,7 @@ static const struct xen_front_pgdir_shbuf_ops local_ops = { .grant_refs_for_buffer = guest_grant_refs_for_buffer, }; -/** +/* * Allocate a new instance of a shared buffer. * * \param cfg configuration to be used while allocating a new shared buffer. diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index ef02c9bb0354173eddc4b86d6c998965ba747678..23c0834a97a4acaf490d13d7de32b00daf0bb399 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -313,17 +313,17 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, bp.level - 1, 0); b = bch2_btree_iter_peek_node(iter); - if (IS_ERR(b)) + if (IS_ERR_OR_NULL(b)) goto err; BUG_ON(b->c.level != bp.level - 1); - if (b && extent_matches_bp(c, bp.btree_id, bp.level, - bkey_i_to_s_c(&b->key), - bucket, bp)) + if (extent_matches_bp(c, bp.btree_id, bp.level, + bkey_i_to_s_c(&b->key), + bucket, bp)) return b; - if (b && btree_node_will_make_reachable(b)) { + if (btree_node_will_make_reachable(b)) { b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); } else { backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 9cb8684959ee17affdc2a558c14414477fef922d..403aa3389fccfd2a60ceb3edf53d81877a9144df 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -617,7 +617,7 @@ struct journal_seq_blacklist_table { u64 start; u64 end; bool dirty; - } entries[0]; + } entries[]; }; struct journal_keys { diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index c2adf3fbb0b3abec5a3521d49663ec77ed32916f..6fa90bcd70168258384b1912f8eb7ed2aaf6e8c7 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -3087,8 +3087,6 @@ void bch2_trans_put(struct btree_trans *trans) srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); } - bch2_journal_preres_put(&c->journal, &trans->journal_preres); - kfree(trans->extra_journal_entries.data); if (trans->fs_usage_deltas) { diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 9b78f78a75b59c0cb28dac46fc4e107f0a9cbca1..37fbf22de8fcba305d717f41e4ae8a9461502d53 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -89,10 +89,13 @@ static void bkey_cached_free(struct btree_key_cache *bc, ck->btree_trans_barrier_seq = start_poll_synchronize_srcu(&c->btree_trans_barrier); - if (ck->c.lock.readers) + if (ck->c.lock.readers) { list_move_tail(&ck->list, &bc->freed_pcpu); - else + bc->nr_freed_pcpu++; + } else { list_move_tail(&ck->list, &bc->freed_nonpcpu); + bc->nr_freed_nonpcpu++; + } atomic_long_inc(&bc->nr_freed); kfree(ck->k); @@ -109,6 +112,8 @@ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc, { struct bkey_cached *pos; + bc->nr_freed_nonpcpu++; + list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) { if (ULONG_CMP_GE(ck->btree_trans_barrier_seq, pos->btree_trans_barrier_seq)) { @@ -158,6 +163,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, #else mutex_lock(&bc->lock); list_move_tail(&ck->list, &bc->freed_nonpcpu); + bc->nr_freed_nonpcpu++; mutex_unlock(&bc->lock); #endif } else { @@ -217,6 +223,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, f->nr < ARRAY_SIZE(f->objs) / 2) { ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); list_del_init(&ck->list); + bc->nr_freed_nonpcpu--; f->objs[f->nr++] = ck; } @@ -229,6 +236,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, if (!list_empty(&bc->freed_nonpcpu)) { ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); list_del_init(&ck->list); + bc->nr_freed_nonpcpu--; } mutex_unlock(&bc->lock); #endif @@ -664,7 +672,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, goto out; bch2_journal_pin_drop(j, &ck->journal); - bch2_journal_preres_put(j, &ck->res); BUG_ON(!btree_node_locked(c_iter.path, 0)); @@ -762,18 +769,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, BUG_ON(insert->k.u64s > ck->u64s); - if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) { - int difference; - - BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s); - - difference = jset_u64s(insert->k.u64s) - ck->res.u64s; - if (difference > 0) { - trans->journal_preres.u64s -= difference; - ck->res.u64s += difference; - } - } - bkey_copy(ck->k, insert); ck->valid = true; @@ -850,6 +845,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, * Newest freed entries are at the end of the list - once we hit one * that's too new to be freed, we can bail out: */ + scanned += bc->nr_freed_nonpcpu; + list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) @@ -859,13 +856,15 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); atomic_long_dec(&bc->nr_freed); - scanned++; freed++; + bc->nr_freed_nonpcpu--; } if (scanned >= nr) goto out; + scanned += bc->nr_freed_pcpu; + list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) @@ -875,8 +874,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, six_lock_exit(&ck->c.lock); kmem_cache_free(bch2_key_cache, ck); atomic_long_dec(&bc->nr_freed); - scanned++; freed++; + bc->nr_freed_pcpu--; } if (scanned >= nr) @@ -982,6 +981,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) } #endif + BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu); + BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu); + list_splice(&bc->freed_pcpu, &items); list_splice(&bc->freed_nonpcpu, &items); @@ -991,7 +993,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) cond_resched(); bch2_journal_pin_drop(&c->journal, &ck->journal); - bch2_journal_preres_put(&c->journal, &ck->res); list_del(&ck->list); kfree(ck->k); diff --git a/fs/bcachefs/btree_key_cache_types.h b/fs/bcachefs/btree_key_cache_types.h new file mode 100644 index 0000000000000000000000000000000000000000..290e4e57df5bbcfeffb38d666aa18c89a7c1c5a6 --- /dev/null +++ b/fs/bcachefs/btree_key_cache_types.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H +#define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H + +struct btree_key_cache_freelist { + struct bkey_cached *objs[16]; + unsigned nr; +}; + +struct btree_key_cache { + struct mutex lock; + struct rhashtable table; + bool table_init_done; + + struct list_head freed_pcpu; + size_t nr_freed_pcpu; + struct list_head freed_nonpcpu; + size_t nr_freed_nonpcpu; + + struct shrinker *shrink; + unsigned shrink_iter; + struct btree_key_cache_freelist __percpu *pcpu_freed; + + atomic_long_t nr_freed; + atomic_long_t nr_keys; + atomic_long_t nr_dirty; +}; + +struct bkey_cached_key { + u32 btree_id; + struct bpos pos; +} __packed __aligned(4); + +#endif /* _BCACHEFS_BTREE_KEY_CACHE_TYPES_H */ diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index decad7b66c59c114a9315d7acd1ec3bf755a4230..12907beda98c2b9d259e7896b79867adbbb9a88e 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -78,6 +78,53 @@ inline void bch2_btree_node_prep_for_write(struct btree_trans *trans, bch2_btree_init_next(trans, b); } +static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i) +{ + while (--i >= trans->updates) { + if (same_leaf_as_prev(trans, i)) + continue; + + bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b); + } + + trace_and_count(trans->c, trans_restart_would_deadlock_write, trans); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write); +} + +static inline int bch2_trans_lock_write(struct btree_trans *trans) +{ + struct btree_insert_entry *i; + + EBUG_ON(trans->write_locked); + + trans_for_each_update(trans, i) { + if (same_leaf_as_prev(trans, i)) + continue; + + if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c)) + return trans_lock_write_fail(trans, i); + + if (!i->cached) + bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b); + } + + trans->write_locked = true; + return 0; +} + +static inline void bch2_trans_unlock_write(struct btree_trans *trans) +{ + if (likely(trans->write_locked)) { + struct btree_insert_entry *i; + + trans_for_each_update(trans, i) + if (!same_leaf_as_prev(trans, i)) + bch2_btree_node_unlock_write_inlined(trans, i->path, + insert_l(i)->b); + trans->write_locked = false; + } +} + /* Inserting into a given leaf node (last stage of insert): */ /* Handle overwrites and do insert, for non extents: */ @@ -276,17 +323,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot)); } -static noinline int -bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags, - unsigned long trace_ip) -{ - return drop_locks_do(trans, - bch2_journal_preres_get(&trans->c->journal, - &trans->journal_preres, - trans->journal_preres_u64s, - (flags & BCH_WATERMARK_MASK))); -} - static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, unsigned flags) { @@ -321,6 +357,45 @@ static inline int btree_key_can_insert(struct btree_trans *trans, return 0; } +noinline static int +btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, + struct btree_path *path, unsigned new_u64s) +{ + struct bch_fs *c = trans->c; + struct btree_insert_entry *i; + struct bkey_cached *ck = (void *) path->l[0].b; + struct bkey_i *new_k; + int ret; + + bch2_trans_unlock_write(trans); + bch2_trans_unlock(trans); + + new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); + if (!new_k) { + bch_err(c, "error allocating memory for key cache key, btree %s u64s %u", + bch2_btree_id_str(path->btree_id), new_u64s); + return -BCH_ERR_ENOMEM_btree_key_cache_insert; + } + + ret = bch2_trans_relock(trans) ?: + bch2_trans_lock_write(trans); + if (unlikely(ret)) { + kfree(new_k); + return ret; + } + + memcpy(new_k, ck->k, ck->u64s * sizeof(u64)); + + trans_for_each_update(trans, i) + if (i->old_v == &ck->k->v) + i->old_v = &new_k->v; + + kfree(ck->k); + ck->u64s = new_u64s; + ck->k = new_k; + return 0; +} + static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags, struct btree_path *path, unsigned u64s) { @@ -347,12 +422,9 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags return 0; new_u64s = roundup_pow_of_two(u64s); - new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS); - if (!new_k) { - bch_err(c, "error allocating memory for key cache key, btree %s u64s %u", - bch2_btree_id_str(path->btree_id), new_u64s); - return -BCH_ERR_ENOMEM_btree_key_cache_insert; - } + new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT); + if (unlikely(!new_k)) + return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s); trans_for_each_update(trans, i) if (i->old_v == &ck->k->v) @@ -732,37 +804,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, return ret; } -static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i) -{ - while (--i >= trans->updates) { - if (same_leaf_as_prev(trans, i)) - continue; - - bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b); - } - - trace_and_count(trans->c, trans_restart_would_deadlock_write, trans); - return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write); -} - -static inline int trans_lock_write(struct btree_trans *trans) -{ - struct btree_insert_entry *i; - - trans_for_each_update(trans, i) { - if (same_leaf_as_prev(trans, i)) - continue; - - if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c)) - return trans_lock_write_fail(trans, i); - - if (!i->cached) - bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b); - } - - return 0; -} - static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans) { struct btree_insert_entry *i; @@ -830,15 +871,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags } } - ret = bch2_journal_preres_get(&c->journal, - &trans->journal_preres, trans->journal_preres_u64s, - (flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK); - if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked)) - ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip); - if (unlikely(ret)) - return ret; - - ret = trans_lock_write(trans); + ret = bch2_trans_lock_write(trans); if (unlikely(ret)) return ret; @@ -847,10 +880,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags if (!ret && unlikely(trans->journal_replay_not_finished)) bch2_drop_overwrites_from_journal(trans); - trans_for_each_update(trans, i) - if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_unlock_write_inlined(trans, i->path, - insert_l(i)->b); + bch2_trans_unlock_write(trans); if (!ret && trans->journal_pin) bch2_journal_pin_add(&c->journal, trans->journal_res.seq, @@ -1003,7 +1033,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) struct bch_fs *c = trans->c; struct btree_insert_entry *i = NULL; struct btree_write_buffered_key *wb; - unsigned u64s; int ret = 0; if (!trans->nr_updates && @@ -1063,13 +1092,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); - memset(&trans->journal_preres, 0, sizeof(trans->journal_preres)); - trans->journal_u64s = trans->extra_journal_entries.nr; - trans->journal_preres_u64s = 0; - trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); - if (trans->journal_transaction_names) trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); @@ -1085,16 +1109,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (i->key_cache_already_flushed) continue; - /* we're going to journal the key being updated: */ - u64s = jset_u64s(i->k->k.u64s); - if (i->cached && - likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) - trans->journal_preres_u64s += u64s; - if (i->flags & BTREE_UPDATE_NOJOURNAL) continue; - trans->journal_u64s += u64s; + /* we're going to journal the key being updated: */ + trans->journal_u64s += jset_u64s(i->k->k.u64s); /* and we're also going to log the overwrite: */ if (trans->journal_transaction_names) @@ -1126,8 +1145,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) trace_and_count(c, transaction_commit, trans, _RET_IP_); out: - bch2_journal_preres_put(&c->journal, &trans->journal_preres); - if (likely(!(flags & BTREE_INSERT_NOCHECK_RW))) bch2_write_ref_put(c, BCH_WRITE_REF_trans); out_reset: diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 941841a0c5bf68c56370cf2fadf6669351d582d8..60453ba86c4b963777f67693352d4929ac726549 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -5,7 +5,7 @@ #include <linux/list.h> #include <linux/rhashtable.h> -//#include "bkey_methods.h" +#include "btree_key_cache_types.h" #include "buckets_types.h" #include "darray.h" #include "errcode.h" @@ -312,31 +312,6 @@ struct btree_iter { #endif }; -struct btree_key_cache_freelist { - struct bkey_cached *objs[16]; - unsigned nr; -}; - -struct btree_key_cache { - struct mutex lock; - struct rhashtable table; - bool table_init_done; - struct list_head freed_pcpu; - struct list_head freed_nonpcpu; - struct shrinker *shrink; - unsigned shrink_iter; - struct btree_key_cache_freelist __percpu *pcpu_freed; - - atomic_long_t nr_freed; - atomic_long_t nr_keys; - atomic_long_t nr_dirty; -}; - -struct bkey_cached_key { - u32 btree_id; - struct bpos pos; -} __packed __aligned(4); - #define BKEY_CACHED_ACCESSED 0 #define BKEY_CACHED_DIRTY 1 @@ -352,7 +327,6 @@ struct bkey_cached { struct rhash_head hash; struct list_head list; - struct journal_preres res; struct journal_entry_pin journal; u64 seq; @@ -389,11 +363,7 @@ struct btree_insert_entry { unsigned long ip_allocated; }; -#ifndef CONFIG_LOCKDEP #define BTREE_ITER_MAX 64 -#else -#define BTREE_ITER_MAX 32 -#endif struct btree_trans_commit_hook; typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); @@ -434,6 +404,7 @@ struct btree_trans { bool journal_transaction_names:1; bool journal_replay_not_finished:1; bool notrace_relock_fail:1; + bool write_locked:1; enum bch_errcode restarted:16; u32 restart_count; unsigned long last_begin_ip; @@ -465,11 +436,9 @@ struct btree_trans { struct journal_entry_pin *journal_pin; struct journal_res journal_res; - struct journal_preres journal_preres; u64 *journal_seq; struct disk_reservation *disk_res; unsigned journal_u64s; - unsigned journal_preres_u64s; struct replicas_delta_list *fs_usage_deltas; }; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 39c2db68123bd1e7958cb69540721a1548d92516..76f27bc9fa24e0a57ebbf1f70b5bfbb1f8a79ee6 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -513,8 +513,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans * up_read(&c->gc_lock); as->took_gc_lock = false; - bch2_journal_preres_put(&c->journal, &as->journal_preres); - bch2_journal_pin_drop(&c->journal, &as->journal); bch2_journal_pin_flush(&c->journal, &as->journal); bch2_disk_reservation_put(c, &as->disk_res); @@ -734,8 +732,6 @@ static void btree_update_nodes_written(struct btree_update *as) bch2_journal_pin_drop(&c->journal, &as->journal); - bch2_journal_preres_put(&c->journal, &as->journal_preres); - mutex_lock(&c->btree_interior_update_lock); for (i = 0; i < as->nr_new_nodes; i++) { b = as->new_nodes[i]; @@ -1047,7 +1043,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, unsigned nr_nodes[2] = { 0, 0 }; unsigned update_level = level; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; - unsigned journal_flags = 0; int ret = 0; u32 restart_count = trans->restart_count; @@ -1061,10 +1056,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, flags &= ~BCH_WATERMARK_MASK; flags |= watermark; - if (flags & BTREE_INSERT_JOURNAL_RECLAIM) - journal_flags |= JOURNAL_RES_GET_NONBLOCK; - journal_flags |= watermark; - while (1) { nr_nodes[!!update_level] += 1 + split; update_level++; @@ -1129,27 +1120,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (ret) goto err; - ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, - BTREE_UPDATE_JOURNAL_RES, - journal_flags|JOURNAL_RES_GET_NONBLOCK); - if (ret) { - if (flags & BTREE_INSERT_JOURNAL_RECLAIM) { - ret = -BCH_ERR_journal_reclaim_would_deadlock; - goto err; - } - - ret = drop_locks_do(trans, - bch2_journal_preres_get(&c->journal, &as->journal_preres, - BTREE_UPDATE_JOURNAL_RES, - journal_flags)); - if (ret == -BCH_ERR_journal_preres_get_blocked) { - trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags); - ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); - } - if (ret) - goto err; - } - ret = bch2_disk_reservation_get(c, &as->disk_res, (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c), c->opts.metadata_replicas, diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 4df21512d640dac83c8948137dfa4fd077b2ef39..031076e75fa1322a82a202e150a8eca9a75c063e 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -55,7 +55,6 @@ struct btree_update { unsigned update_level; struct disk_reservation disk_res; - struct journal_preres journal_preres; /* * BTREE_INTERIOR_UPDATING_NODE: diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0771a6d880bf5e2e4efcbcc21d91d34b64160dd4..5ed66202c22654a71b79fde919f6dd425e472874 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -239,6 +239,34 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, next_pos = insert->k.p; + /* + * Check for nonce offset inconsistency: + * This is debug code - we've been seeing this bug rarely, and + * it's been hard to reproduce, so this should give us some more + * information when it does occur: + */ + struct printbuf err = PRINTBUF; + int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err); + printbuf_exit(&err); + + if (invalid) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "about to insert invalid key in data update path"); + prt_str(&buf, "\nold: "); + bch2_bkey_val_to_text(&buf, c, old); + prt_str(&buf, "\nk: "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\nnew: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + + bch2_print_string_as_lines(KERN_ERR, buf.buf); + printbuf_exit(&buf); + + bch2_fatal_error(c); + goto out; + } + ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, bkey_start_pos(&insert->k)) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index d613695abf9f67c2e9f2ab4ce91d863bdfd743c7..4d0cb0ccff32f2c75fa66f932f517f00b9cfdf25 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -555,6 +555,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) case TARGET_DEV: { struct bch_dev *ca; + out->atomic++; rcu_read_lock(); ca = t.dev < c->sb.nr_devices ? rcu_dereference(c->devs[t.dev]) @@ -570,6 +571,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) } rcu_read_unlock(); + out->atomic--; break; } case TARGET_GROUP: @@ -580,7 +582,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) } } -void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) +static void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) { struct target t = target_decode(v); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 875f7c5a6fca63337a6be502daf2dda5a48844ea..2a77de18c004e77041049b763d277028856b7da6 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1373,6 +1373,15 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target, h->nr_active_devs++; rcu_read_unlock(); + + /* + * If we only have redundancy + 1 devices, we're better off with just + * replication: + */ + if (h->nr_active_devs < h->redundancy + 2) + bch_err(c, "insufficient devices available to create stripe (have %u, need %u) - mismatched bucket sizes?", + h->nr_active_devs, h->redundancy + 2); + list_add(&h->list, &c->ec_stripe_head_list); return h; } @@ -1424,6 +1433,11 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark); found: + if (!IS_ERR_OR_NULL(h) && + h->nr_active_devs < h->redundancy + 2) { + mutex_unlock(&h->lock); + h = NULL; + } mutex_unlock(&c->ec_stripe_head_lock); return h; } @@ -1681,8 +1695,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, int ret; h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark); - if (!h) - bch_err(c, "no stripe head"); if (IS_ERR_OR_NULL(h)) return h; diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c index 8bd9bcdd27f738a7a2f0d2ac831f0c77fdf20aa3..ff664fd0d8ef80e8b4816d7c430e87d41759b498 100644 --- a/fs/bcachefs/fs-io-pagecache.c +++ b/fs/bcachefs/fs-io-pagecache.c @@ -13,7 +13,7 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping, loff_t start, u64 end, - int fgp_flags, gfp_t gfp, + fgf_t fgp_flags, gfp_t gfp, folios *fs) { struct folio *f; diff --git a/fs/bcachefs/fs-io-pagecache.h b/fs/bcachefs/fs-io-pagecache.h index a2222ad586e9e7530728507516abc33da4b0c128..27f712ae37a68209275cc3b2955a542314e80e68 100644 --- a/fs/bcachefs/fs-io-pagecache.h +++ b/fs/bcachefs/fs-io-pagecache.h @@ -7,7 +7,7 @@ typedef DARRAY(struct folio *) folios; int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t, - u64, int, gfp_t, folios *); + u64, fgf_t, gfp_t, folios *); int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t); /* diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 166d8d8abe683f1b05ddd0115763c79015208fa1..8ef817304e4a2eed0a70a6e5f38e62683e22705e 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1922,10 +1922,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, return dget(sb->s_root); err_put_super: - sb->s_fs_info = NULL; - c->vfs_sb = NULL; deactivate_locked_super(sb); - bch2_fs_stop(c); return ERR_PTR(bch2_err_class(ret)); } @@ -1933,11 +1930,8 @@ static void bch2_kill_sb(struct super_block *sb) { struct bch_fs *c = sb->s_fs_info; - if (c) - c->vfs_sb = NULL; generic_shutdown_super(sb); - if (c) - bch2_fs_free(c); + bch2_fs_free(c); } static struct file_system_type bcache_fs_type = { diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 9f3e9bd3d767a75fb1a0734c0413193a671f3206..e0c5cd119acc938a5bfe3ff2be8cac2cf1504b11 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2220,7 +2220,7 @@ static int nlink_cmp(const void *_l, const void *_r) const struct nlink *l = _l; const struct nlink *r = _r; - return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot); + return cmp_int(l->inum, r->inum); } static void inc_link(struct bch_fs *c, struct snapshots_seen *s, diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index def77f2d88024b788b6ce7c03bda07827f73a621..c7849b0753e7a115d563aa4cfdf5bdf33b319ec0 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1134,7 +1134,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, * unlinked inodes in the snapshot leaves: */ *need_another_pass = true; - return 0; + goto out; } ret = 1; @@ -1169,8 +1169,10 @@ int bch2_delete_dead_inodes(struct bch_fs *c) */ for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p, - &need_another_pass)); + ret = commit_do(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass)); if (ret < 0) break; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index f02b3f7d26a016594c3de2a8f25b0006638b91c2..d704a8f829c8a7fa3db1cc481857dd431b67190b 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -795,7 +795,7 @@ static int bch2_write_decrypt(struct bch_write_op *op) * checksum: */ csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); - if (bch2_crc_cmp(op->crc.csum, csum)) + if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) return -EIO; ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 5b5d69f2316b216746c0c08db2346c2c8c95ff16..23a9b7845d11975313414ef8a24bad4345a0d605 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -526,36 +526,6 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, return ret; } -/* journal_preres: */ - -static bool journal_preres_available(struct journal *j, - struct journal_preres *res, - unsigned new_u64s, - unsigned flags) -{ - bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true); - - if (!ret && mutex_trylock(&j->reclaim_lock)) { - bch2_journal_reclaim(j); - mutex_unlock(&j->reclaim_lock); - } - - return ret; -} - -int __bch2_journal_preres_get(struct journal *j, - struct journal_preres *res, - unsigned new_u64s, - unsigned flags) -{ - int ret; - - closure_wait_event(&j->preres_wait, - (ret = bch2_journal_error(j)) || - journal_preres_available(j, res, new_u64s, flags)); - return ret; -} - /* journal_entry_res: */ void bch2_journal_entry_res_resize(struct journal *j, @@ -1306,7 +1276,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j)); prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk); prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk); - prt_printf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining); prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]); prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved); prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 011711e99c8d825ec968cf513f82c08a66ecabc5..c85d01cf49484984d08d20a2159f84b2506f96a1 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -395,104 +395,6 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re return 0; } -/* journal_preres: */ - -static inline void journal_set_watermark(struct journal *j) -{ - union journal_preres_state s = READ_ONCE(j->prereserved); - unsigned watermark = BCH_WATERMARK_stripe; - - if (fifo_free(&j->pin) < j->pin.size / 4) - watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc); - if (fifo_free(&j->pin) < j->pin.size / 8) - watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim); - - if (s.reserved > s.remaining) - watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc); - if (!s.remaining) - watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim); - - if (watermark == j->watermark) - return; - - swap(watermark, j->watermark); - if (watermark > j->watermark) - journal_wake(j); -} - -static inline void bch2_journal_preres_put(struct journal *j, - struct journal_preres *res) -{ - union journal_preres_state s = { .reserved = res->u64s }; - - if (!res->u64s) - return; - - s.v = atomic64_sub_return(s.v, &j->prereserved.counter); - res->u64s = 0; - - if (unlikely(s.waiting)) { - clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)), - (unsigned long *) &j->prereserved.v); - closure_wake_up(&j->preres_wait); - } - - if (s.reserved <= s.remaining && j->watermark) - journal_set_watermark(j); -} - -int __bch2_journal_preres_get(struct journal *, - struct journal_preres *, unsigned, unsigned); - -static inline int bch2_journal_preres_get_fast(struct journal *j, - struct journal_preres *res, - unsigned new_u64s, - unsigned flags, - bool set_waiting) -{ - int d = new_u64s - res->u64s; - union journal_preres_state old, new; - u64 v = atomic64_read(&j->prereserved.counter); - enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; - int ret; - - do { - old.v = new.v = v; - ret = 0; - - if (watermark == BCH_WATERMARK_reclaim || - new.reserved + d < new.remaining) { - new.reserved += d; - ret = 1; - } else if (set_waiting && !new.waiting) - new.waiting = true; - else - return 0; - } while ((v = atomic64_cmpxchg(&j->prereserved.counter, - old.v, new.v)) != old.v); - - if (ret) - res->u64s += d; - return ret; -} - -static inline int bch2_journal_preres_get(struct journal *j, - struct journal_preres *res, - unsigned new_u64s, - unsigned flags) -{ - if (new_u64s <= res->u64s) - return 0; - - if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false)) - return 0; - - if (flags & JOURNAL_RES_GET_NONBLOCK) - return -BCH_ERR_journal_preres_get_blocked; - - return __bch2_journal_preres_get(j, res, new_u64s, flags); -} - /* journal_entry_res: */ void bch2_journal_entry_res_resize(struct journal *, diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index f4bc2cdbfdd7921b4d562cf0df7d29b1f8c51c87..786a0928550920a906197fff61d51eccaa6126bb 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1079,6 +1079,12 @@ static void bch2_journal_read_device(struct closure *cl) if (ja->bucket_seq[ja->cur_idx] && ja->sectors_free == ca->mi.bucket_size) { +#if 0 + /* + * Debug code for ZNS support, where we (probably) want to be + * correlated where we stopped in the journal to the zone write + * points: + */ bch_err(c, "ja->sectors_free == ca->mi.bucket_size"); bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr); for (i = 0; i < 3; i++) { @@ -1086,6 +1092,7 @@ static void bch2_journal_read_device(struct closure *cl) bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]); } +#endif ja->sectors_free = 0; } diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 9a584aaaa2eba9abadc7f2016a20c70834e0610c..e63c6eda86afeb9e9c0920554e9bef953b0a9a26 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -50,16 +50,21 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j, return available; } -static void journal_set_remaining(struct journal *j, unsigned u64s_remaining) +static inline void journal_set_watermark(struct journal *j, bool low_on_space) { - union journal_preres_state old, new; - u64 v = atomic64_read(&j->prereserved.counter); + unsigned watermark = BCH_WATERMARK_stripe; - do { - old.v = new.v = v; - new.remaining = u64s_remaining; - } while ((v = atomic64_cmpxchg(&j->prereserved.counter, - old.v, new.v)) != old.v); + if (low_on_space) + watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim); + if (fifo_free(&j->pin) < j->pin.size / 4) + watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim); + + if (watermark == j->watermark) + return; + + swap(watermark, j->watermark); + if (watermark > j->watermark) + journal_wake(j); } static struct journal_space @@ -162,7 +167,6 @@ void bch2_journal_space_available(struct journal *j) struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; unsigned clean, clean_ondisk, total; - s64 u64s_remaining = 0; unsigned max_entry_size = min(j->buf[0].buf_size >> 9, j->buf[1].buf_size >> 9); unsigned i, nr_online = 0, nr_devs_want; @@ -222,16 +226,10 @@ void bch2_journal_space_available(struct journal *j) else clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); - u64s_remaining = (u64) clean << 6; - u64s_remaining -= (u64) total << 3; - u64s_remaining = max(0LL, u64s_remaining); - u64s_remaining /= 4; - u64s_remaining = min_t(u64, u64s_remaining, U32_MAX); + journal_set_watermark(j, clean * 4 <= total); out: j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; j->cur_entry_error = ret; - journal_set_remaining(j, u64s_remaining); - journal_set_watermark(j); if (!ret) journal_wake(j); @@ -555,11 +553,6 @@ static u64 journal_seq_to_flush(struct journal *j) /* Try to keep the journal at most half full: */ nr_buckets = ja->nr / 2; - /* And include pre-reservations: */ - nr_buckets += DIV_ROUND_UP(j->prereserved.reserved, - (ca->mi.bucket_size << 6) - - journal_entry_overhead(j)); - nr_buckets = min(nr_buckets, ja->nr); bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr; @@ -638,10 +631,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked) msecs_to_jiffies(c->opts.journal_reclaim_delay))) min_nr = 1; - if (j->prereserved.reserved * 4 > j->prereserved.remaining) - min_nr = 1; - - if (fifo_free(&j->pin) <= 32) + if (j->watermark != BCH_WATERMARK_stripe) min_nr = 1; if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used) @@ -652,8 +642,6 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked) trace_and_count(c, journal_reclaim_start, c, direct, kicked, min_nr, min_key_cache, - j->prereserved.reserved, - j->prereserved.remaining, atomic_read(&c->btree_cache.dirty), c->btree_cache.used, atomic_long_read(&c->btree_key_cache.nr_dirty), diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 42504e16acb6ccf261a6699b6d468cba7d26a776..a756b69582e34955ecfe86fbaa688785aeca532f 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -76,14 +76,6 @@ struct journal_res { u64 seq; }; -/* - * For reserving space in the journal prior to getting a reservation on a - * particular journal entry: - */ -struct journal_preres { - unsigned u64s; -}; - union journal_res_state { struct { atomic64_t counter; @@ -104,22 +96,6 @@ union journal_res_state { }; }; -union journal_preres_state { - struct { - atomic64_t counter; - }; - - struct { - u64 v; - }; - - struct { - u64 waiting:1, - reserved:31, - remaining:32; - }; -}; - /* bytes: */ #define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */ #define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */ @@ -180,8 +156,6 @@ struct journal { union journal_res_state reservations; enum bch_watermark watermark; - union journal_preres_state prereserved; - } __aligned(SMP_CACHE_BYTES); unsigned long flags; diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index b775cf0fb7cbf211a3f388cf78de0de6f33c581c..97790445e67ad2923fc4a0413d2c824cf506455e 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -163,8 +163,11 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, this_cpu_sub(*lock->readers, !ret); preempt_enable(); - if (!ret && (old & SIX_LOCK_WAITING_write)) - ret = -1 - SIX_LOCK_write; + if (!ret) { + smp_mb(); + if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write) + ret = -1 - SIX_LOCK_write; + } } else if (type == SIX_LOCK_write && lock->readers) { if (try) { atomic_add(SIX_LOCK_HELD_write, &lock->state); diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h index 86833445af205643b81bd08b3b204005c7bee071..2d2e66a4e4681ee5ba6ba18666d135ab961a2cbf 100644 --- a/fs/bcachefs/subvolume_types.h +++ b/fs/bcachefs/subvolume_types.h @@ -20,7 +20,7 @@ struct snapshot_t { }; struct snapshot_table { - struct snapshot_t s[0]; + DECLARE_FLEX_ARRAY(struct snapshot_t, s); }; typedef struct { diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 893304a1f06e6ea03df55020cf7be26f349d8cfe..7857671159b491235a0bcdfe19f9b34a316a0126 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -196,10 +196,9 @@ DEFINE_EVENT(bio, journal_write, TRACE_EVENT(journal_reclaim_start, TP_PROTO(struct bch_fs *c, bool direct, bool kicked, u64 min_nr, u64 min_key_cache, - u64 prereserved, u64 prereserved_total, u64 btree_cache_dirty, u64 btree_cache_total, u64 btree_key_cache_dirty, u64 btree_key_cache_total), - TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total, + TP_ARGS(c, direct, kicked, min_nr, min_key_cache, btree_cache_dirty, btree_cache_total, btree_key_cache_dirty, btree_key_cache_total), @@ -209,8 +208,6 @@ TRACE_EVENT(journal_reclaim_start, __field(bool, kicked ) __field(u64, min_nr ) __field(u64, min_key_cache ) - __field(u64, prereserved ) - __field(u64, prereserved_total ) __field(u64, btree_cache_dirty ) __field(u64, btree_cache_total ) __field(u64, btree_key_cache_dirty ) @@ -223,22 +220,18 @@ TRACE_EVENT(journal_reclaim_start, __entry->kicked = kicked; __entry->min_nr = min_nr; __entry->min_key_cache = min_key_cache; - __entry->prereserved = prereserved; - __entry->prereserved_total = prereserved_total; __entry->btree_cache_dirty = btree_cache_dirty; __entry->btree_cache_total = btree_cache_total; __entry->btree_key_cache_dirty = btree_key_cache_dirty; __entry->btree_key_cache_total = btree_key_cache_total; ), - TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu", + TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->direct, __entry->kicked, __entry->min_nr, __entry->min_key_cache, - __entry->prereserved, - __entry->prereserved_total, __entry->btree_cache_dirty, __entry->btree_cache_total, __entry->btree_key_cache_dirty, diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index a39ff0c296ecfb2a000edd6aace20bdbb8db20ea..79d982674c180307f5d5a4da42fabaa480878573 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -552,6 +552,14 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, s.v = v + 1; s.defined = true; } else { + /* + * Check if this option was set on the parent - if so, switched + * back to inheriting from the parent: + * + * rename() also has to deal with keeping inherited options up + * to date - see bch2_reinherit_attrs() + */ + spin_lock(&dentry->d_lock); if (!IS_ROOT(dentry)) { struct bch_inode_info *dir = to_bch_ei(d_inode(dentry->d_parent)); @@ -560,6 +568,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, } else { s.v = 0; } + spin_unlock(&dentry->d_lock); s.defined = false; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2a9344a3fcee929d971aa065e2400e8b431dff82..35c1d24d4a78424c0efba85fa2837f64da304d8f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -432,7 +432,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (btrfs_block_can_be_shared(trans, root, buf)) { ret = btrfs_lookup_extent_info(trans, fs_info, buf->start, btrfs_header_level(buf), 1, - &refs, &flags); + &refs, &flags, NULL); if (ret) return ret; if (unlikely(refs == 0)) { diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 9223934d95f4724cbbc4e726306049d444112ee1..891ea2fa263c935707be62167f59c2c1474e9a8d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -1041,7 +1041,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, return -ENOMEM; } - if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) { + if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); @@ -1144,7 +1144,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, return -ENOMEM; } - if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) { + if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c8e5b4715b495cbbcb536171fee0537be9b6505b..0455935ff558804b3e47291821a566e348d8a6d4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -102,7 +102,8 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) */ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 offset, int metadata, u64 *refs, u64 *flags) + u64 offset, int metadata, u64 *refs, u64 *flags, + u64 *owning_root) { struct btrfs_root *extent_root; struct btrfs_delayed_ref_head *head; @@ -114,6 +115,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u32 item_size; u64 num_refs; u64 extent_flags; + u64 owner = 0; int ret; /* @@ -167,6 +169,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_extent_item); num_refs = btrfs_extent_refs(leaf, ei); extent_flags = btrfs_extent_flags(leaf, ei); + owner = btrfs_get_extent_owner_root(fs_info, leaf, + path->slots[0]); } else { ret = -EUCLEAN; btrfs_err(fs_info, @@ -226,6 +230,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, *refs = num_refs; if (flags) *flags = extent_flags; + if (owning_root) + *owning_root = owner; out_free: btrfs_free_path(path); return ret; @@ -5234,7 +5240,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, /* We don't lock the tree block, it's OK to be racy here */ ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, wc->level - 1, 1, &refs, - &flags); + &flags, NULL); /* We don't care about errors in readahead. */ if (ret < 0) continue; @@ -5301,7 +5307,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, fs_info, eb->start, level, 1, &wc->refs[level], - &wc->flags[level]); + &wc->flags[level], + NULL); BUG_ON(ret == -ENOMEM); if (ret) return ret; @@ -5391,6 +5398,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, u64 bytenr; u64 generation; u64 parent; + u64 owner_root = 0; struct btrfs_tree_parent_check check = { 0 }; struct btrfs_key key; struct btrfs_ref ref = { 0 }; @@ -5434,7 +5442,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1, &wc->refs[level - 1], - &wc->flags[level - 1]); + &wc->flags[level - 1], + &owner_root); if (ret < 0) goto out_unlock; @@ -5567,8 +5576,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, find_next_key(path, level, &wc->drop_progress); btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, - fs_info->nodesize, parent, - btrfs_header_owner(next)); + fs_info->nodesize, parent, owner_root); btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid, 0, false); ret = btrfs_free_extent(trans, &ref); @@ -5635,7 +5643,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, fs_info, eb->start, level, 1, &wc->refs[level], - &wc->flags[level]); + &wc->flags[level], + NULL); if (ret < 0) { btrfs_tree_unlock_rw(eb, path->locks[level]); path->locks[level] = 0; @@ -5880,7 +5889,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) ret = btrfs_lookup_extent_info(trans, fs_info, path->nodes[level]->start, level, 1, &wc->refs[level], - &wc->flags[level]); + &wc->flags[level], NULL); if (ret < 0) { err = ret; goto out_end_trans; diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h index 0716f65d9753bb91346dad8a724d40ff92c060b6..2e066035cceeeab346aad602a59433533f1de365 100644 --- a/fs/btrfs/extent-tree.h +++ b/fs/btrfs/extent-tree.h @@ -99,7 +99,8 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 offset, int metadata, u64 *refs, u64 *flags); + u64 offset, int metadata, u64 *refs, u64 *flags, + u64 *owner_root); int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num, int reserved); int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5e3fccddde0c618e19567ea3138cd5980a88758d..9f5a9894f88f49156e9ddb7c4baac8ef1be252df 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6983,8 +6983,15 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, int ret; alloc_hint = get_extent_allocation_hint(inode, start, len); +again: ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize, 0, alloc_hint, &ins, 1, 1); + if (ret == -EAGAIN) { + ASSERT(btrfs_is_zoned(fs_info)); + wait_on_bit_io(&inode->root->fs_info->flags, BTRFS_FS_NEED_ZONE_FINISH, + TASK_UNINTERRUPTIBLE); + goto again; + } if (ret) return ERR_PTR(ret); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 752acff2c73436ae844958cf6b4889b5d964dddd..dfe257e1845b6f4c4c72f15710758d12bd419af3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1528,7 +1528,7 @@ static noinline int key_in_sk(struct btrfs_key *key, static noinline int copy_to_sk(struct btrfs_path *path, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, - size_t *buf_size, + u64 *buf_size, char __user *ubuf, unsigned long *sk_offset, int *num_found) @@ -1660,7 +1660,7 @@ static noinline int copy_to_sk(struct btrfs_path *path, static noinline int search_ioctl(struct inode *inode, struct btrfs_ioctl_search_key *sk, - size_t *buf_size, + u64 *buf_size, char __user *ubuf) { struct btrfs_fs_info *info = btrfs_sb(inode->i_sb); @@ -1733,7 +1733,7 @@ static noinline int btrfs_ioctl_tree_search(struct inode *inode, struct btrfs_ioctl_search_args __user *uargs = argp; struct btrfs_ioctl_search_key sk; int ret; - size_t buf_size; + u64 buf_size; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1763,8 +1763,8 @@ static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode, struct btrfs_ioctl_search_args_v2 __user *uarg = argp; struct btrfs_ioctl_search_args_v2 args; int ret; - size_t buf_size; - const size_t buf_limit = SZ_16M; + u64 buf_size; + const u64 buf_limit = SZ_16M; if (!capable(CAP_SYS_ADMIN)) return -EPERM; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index edb84cc032377330bac7742d8a03424fa2e48061..ce446d9d7f23da3f44cb9c7100c8e0d7b3866684 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1888,7 +1888,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, u64 bytenr = record->bytenr; if (!btrfs_qgroup_full_accounting(fs_info)) - return 0; + return 1; lockdep_assert_held(&delayed_refs->lock); trace_btrfs_qgroup_trace_extent(fs_info, record); @@ -2874,13 +2874,19 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, qgroup_update_counters(fs_info, &qgroups, nr_old_roots, nr_new_roots, num_bytes, seq); + /* + * We're done using the iterator, release all its qgroups while holding + * fs_info->qgroup_lock so that we don't race with btrfs_remove_qgroup() + * and trigger use-after-free accesses to qgroups. + */ + qgroup_iterator_nested_clean(&qgroups); + /* * Bump qgroup_seq to avoid seq overlap */ fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; spin_unlock(&fs_info->qgroup_lock); out_free: - qgroup_iterator_nested_clean(&qgroups); ulist_free(old_roots); ulist_free(new_roots); return ret; diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index 944e8f1862aaaa69aec78c0de74ad6e9b1f42de6..9589362acfbf9e2ec3c99fdc4b1fb056bf3ddbc7 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -145,7 +145,7 @@ int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, btrfs_put_bioc(bioc); } - return ret; + return 0; } int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9ce5be21b0360107d1a2416f06fa0cbfd1710030..f62a408671cbc3ba06bf244bb12de96bdfb4334a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1868,6 +1868,9 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group * */ ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES); + /* @found_logical_ret must be specified. */ + ASSERT(found_logical_ret); + stripe = &sctx->stripes[sctx->cur_stripe]; scrub_reset_stripe(stripe); ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path, @@ -1876,8 +1879,7 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group * /* Either >0 as no more extents or <0 for error. */ if (ret) return ret; - if (found_logical_ret) - *found_logical_ret = stripe->logical; + *found_logical_ret = stripe->logical; sctx->cur_stripe++; /* We filled one group, submit it. */ @@ -2080,7 +2082,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, /* Go through each extent items inside the logical range */ while (cur_logical < logical_end) { - u64 found_logical; + u64 found_logical = U64_MAX; u64 cur_physical = physical + cur_logical - logical_start; /* Canceled? */ @@ -2115,6 +2117,8 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, if (ret < 0) break; + /* queue_scrub_stripe() returned 0, @found_logical must be updated. */ + ASSERT(found_logical != U64_MAX); cur_logical = found_logical + BTRFS_STRIPE_LEN; /* Don't hold CPU for too long time */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c87e18827a0a6cc411cd9d01411efe816a10c047..c6f16625af51f5c9ede23329f16b9bca8031e4af 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -748,13 +748,13 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (!fs_devices) { fs_devices = alloc_fs_devices(disk_super->fsid); + if (IS_ERR(fs_devices)) + return ERR_CAST(fs_devices); + if (has_metadata_uuid) memcpy(fs_devices->metadata_uuid, disk_super->metadata_uuid, BTRFS_FSID_SIZE); - if (IS_ERR(fs_devices)) - return ERR_CAST(fs_devices); - if (same_fsid_diff_dev) { generate_random_uuid(fs_devices->fsid); fs_devices->temp_fsid = true; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 3504ade30cb0c3426bcd64c0418b326f1d7a6153..188378ca19c7f67a1f2c519f0a90629a4107fb5d 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1661,13 +1661,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } out: - if (cache->alloc_offset > fs_info->zone_size) { - btrfs_err(fs_info, - "zoned: invalid write pointer %llu in block group %llu", - cache->alloc_offset, cache->start); - ret = -EIO; - } - if (cache->alloc_offset > cache->zone_capacity) { btrfs_err(fs_info, "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 929248c6ca84c47f828feecbdcbbb18286d5819f..4cbe0434cbb8ce973865153ddf4a9d7c332bea5e 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -84,8 +84,8 @@ int nfsd_net_reply_cache_init(struct nfsd_net *nn); void nfsd_net_reply_cache_destroy(struct nfsd_net *nn); int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); -int nfsd_cache_lookup(struct svc_rqst *rqstp, - struct nfsd_cacherep **cacherep); +int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, + unsigned int len, struct nfsd_cacherep **cacherep); void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp, int cachetype, __be32 *statp); int nfsd_reply_cache_stats_show(struct seq_file *m, void *v); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4045c852a450e7ab172f26ecfcfc5e2805bfb6df..40415929e2aef385fce9b658ba68b32425a6462f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2804,7 +2804,7 @@ static int client_opens_release(struct inode *inode, struct file *file) /* XXX: alternatively, we could get/drop in seq start/stop */ drop_client(clp); - return 0; + return seq_release(inode, file); } static const struct file_operations client_states_fops = { diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index fd56a52aa5fb678859e5fd12eb8e00492e671372..d3273a3966598b83e46679102fa9b7e00d879184 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -369,33 +369,52 @@ nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) return freed; } -/* - * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes +/** + * nfsd_cache_csum - Checksum incoming NFS Call arguments + * @buf: buffer containing a whole RPC Call message + * @start: starting byte of the NFS Call header + * @remaining: size of the NFS Call header, in bytes + * + * Compute a weak checksum of the leading bytes of an NFS procedure + * call header to help verify that a retransmitted Call matches an + * entry in the duplicate reply cache. + * + * To avoid assumptions about how the RPC message is laid out in + * @buf and what else it might contain (eg, a GSS MIC suffix), the + * caller passes us the exact location and length of the NFS Call + * header. + * + * Returns a 32-bit checksum value, as defined in RFC 793. */ -static __wsum -nfsd_cache_csum(struct svc_rqst *rqstp) +static __wsum nfsd_cache_csum(struct xdr_buf *buf, unsigned int start, + unsigned int remaining) { + unsigned int base, len; + struct xdr_buf subbuf; + __wsum csum = 0; + void *p; int idx; - unsigned int base; - __wsum csum; - struct xdr_buf *buf = &rqstp->rq_arg; - const unsigned char *p = buf->head[0].iov_base; - size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, - RC_CSUMLEN); - size_t len = min(buf->head[0].iov_len, csum_len); + + if (remaining > RC_CSUMLEN) + remaining = RC_CSUMLEN; + if (xdr_buf_subsegment(buf, &subbuf, start, remaining)) + return csum; /* rq_arg.head first */ - csum = csum_partial(p, len, 0); - csum_len -= len; + if (subbuf.head[0].iov_len) { + len = min_t(unsigned int, subbuf.head[0].iov_len, remaining); + csum = csum_partial(subbuf.head[0].iov_base, len, csum); + remaining -= len; + } /* Continue into page array */ - idx = buf->page_base / PAGE_SIZE; - base = buf->page_base & ~PAGE_MASK; - while (csum_len) { - p = page_address(buf->pages[idx]) + base; - len = min_t(size_t, PAGE_SIZE - base, csum_len); + idx = subbuf.page_base / PAGE_SIZE; + base = subbuf.page_base & ~PAGE_MASK; + while (remaining) { + p = page_address(subbuf.pages[idx]) + base; + len = min_t(unsigned int, PAGE_SIZE - base, remaining); csum = csum_partial(p, len, csum); - csum_len -= len; + remaining -= len; base = 0; ++idx; } @@ -466,6 +485,8 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key, /** * nfsd_cache_lookup - Find an entry in the duplicate reply cache * @rqstp: Incoming Call to find + * @start: starting byte in @rqstp->rq_arg of the NFS Call header + * @len: size of the NFS Call header, in bytes * @cacherep: OUT: DRC entry for this request * * Try to find an entry matching the current call in the cache. When none @@ -479,7 +500,8 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key, * %RC_REPLY: Reply from cache * %RC_DROPIT: Do not process the request further */ -int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep) +int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, + unsigned int len, struct nfsd_cacherep **cacherep) { struct nfsd_net *nn; struct nfsd_cacherep *rp, *found; @@ -495,7 +517,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep) goto out; } - csum = nfsd_cache_csum(rqstp); + csum = nfsd_cache_csum(&rqstp->rq_arg, start, len); /* * Since the common case is a cache miss followed by an insert, @@ -641,24 +663,17 @@ void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp, return; } -/* - * Copy cached reply to current reply buffer. Should always fit. - * FIXME as reply is in a page, we should just attach the page, and - * keep a refcount.... - */ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) { - struct kvec *vec = &rqstp->rq_res.head[0]; - - if (vec->iov_len + data->iov_len > PAGE_SIZE) { - printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n", - data->iov_len); - return 0; - } - memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len); - vec->iov_len += data->iov_len; - return 1; + __be32 *p; + + p = xdr_reserve_space(&rqstp->rq_res_stream, data->iov_len); + if (unlikely(!p)) + return false; + memcpy(p, data->iov_base, data->iov_len); + xdr_commit_encode(&rqstp->rq_res_stream); + return true; } /* diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d6122bb2d167b44b21e888ebb0fab8cda24c4cf8..fe61d9bbcc1faa2d704f9ec926812a022d78381d 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -981,6 +981,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp) const struct svc_procedure *proc = rqstp->rq_procinfo; __be32 *statp = rqstp->rq_accept_statp; struct nfsd_cacherep *rp; + unsigned int start, len; + __be32 *nfs_reply; /* * Give the xdr decoder a chance to change this if it wants @@ -988,6 +990,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp) */ rqstp->rq_cachetype = proc->pc_cachetype; + /* + * ->pc_decode advances the argument stream past the NFS + * Call header, so grab the header's starting location and + * size now for the call to nfsd_cache_lookup(). + */ + start = xdr_stream_pos(&rqstp->rq_arg_stream); + len = xdr_stream_remaining(&rqstp->rq_arg_stream); if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; @@ -1001,7 +1010,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp) smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1); rp = NULL; - switch (nfsd_cache_lookup(rqstp, &rp)) { + switch (nfsd_cache_lookup(rqstp, start, len, &rp)) { case RC_DOIT: break; case RC_REPLY: @@ -1010,6 +1019,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp) goto out_dropit; } + nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0); *statp = proc->pc_func(rqstp); if (test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; @@ -1023,7 +1033,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp) */ smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1); - nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1); + nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply); out_cached_reply: return 1; diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index ddab9ea267d1283b73f8245aa1adb81232f2a202..3fe2dde1598f9ead48dbbaafcd2c9660fd358f00 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -430,7 +430,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) struct ovl_fs_context *ctx = fc->fs_private; struct ovl_fs_context_layer *l; char *dup = NULL, *iter; - ssize_t nr_lower = 0, nr = 0, nr_data = 0; + ssize_t nr_lower, nr; bool data_layer = false; /* @@ -482,6 +482,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) iter = dup; l = ctx->lower; for (nr = 0; nr < nr_lower; nr++, l++) { + ctx->nr++; memset(l, 0, sizeof(*l)); err = ovl_mount_dir(iter, &l->path); @@ -498,10 +499,10 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) goto out_put; if (data_layer) - nr_data++; + ctx->nr_data++; /* Calling strchr() again would overrun. */ - if ((nr + 1) == nr_lower) + if (ctx->nr == nr_lower) break; err = -EINVAL; @@ -511,7 +512,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) * This is a regular layer so we require that * there are no data layers. */ - if ((ctx->nr_data + nr_data) > 0) { + if (ctx->nr_data > 0) { pr_err("regular lower layers cannot follow data lower layers"); goto out_put; } @@ -524,8 +525,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc) data_layer = true; iter++; } - ctx->nr = nr_lower; - ctx->nr_data += nr_data; kfree(dup); return 0; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 50a201e9cd398aab74a61634f26e51a7f6b47893..c3f020ca13a8c705e312400390b5727e8b4dd312 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -978,7 +978,7 @@ int ovl_set_protattr(struct inode *inode, struct dentry *upper, return 0; } -/** +/* * Caller must hold a reference to inode to prevent it from being freed while * it is marked inuse. */ diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index 6f3285f1dfee58390d157dc8223d269220a730e1..af7849e5974ff36619405a12e667e7543bb3926f 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -64,8 +64,8 @@ struct key_type cifs_spnego_key_type = { * strlen(";sec=ntlmsspi") */ #define MAX_MECH_STR_LEN 13 -/* strlen of "host=" */ -#define HOST_KEY_LEN 5 +/* strlen of ";host=" */ +#define HOST_KEY_LEN 6 /* strlen of ";ip4=" or ";ip6=" */ #define IP_KEY_LEN 5 diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 57c2a7df34578370a046822a9e7844a87b2af8e4..f896f60c924bfa462e8fa764fdf806bcd67b0cfb 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2065,6 +2065,12 @@ void __cifs_put_smb_ses(struct cifs_ses *ses) ses->chans[i].server = NULL; } + /* we now account for primary channel in iface->refcount */ + if (ses->chans[0].iface) { + kref_put(&ses->chans[0].iface->refcount, release_iface); + ses->chans[0].server = NULL; + } + sesInfoFree(ses); cifs_put_tcp_session(server, 0); } diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 0bb2ac9290617941619228caea487d7ba6f41ea6..8b2d7c1ca4284c76cee2154140eaab89fdb8f4df 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -322,28 +322,32 @@ cifs_disable_secondary_channels(struct cifs_ses *ses) iface = ses->chans[i].iface; server = ses->chans[i].server; + /* + * remove these references first, since we need to unlock + * the chan_lock here, since iface_lock is a higher lock + */ + ses->chans[i].iface = NULL; + ses->chans[i].server = NULL; + spin_unlock(&ses->chan_lock); + if (iface) { spin_lock(&ses->iface_lock); kref_put(&iface->refcount, release_iface); - ses->chans[i].iface = NULL; iface->num_channels--; if (iface->weight_fulfilled) iface->weight_fulfilled--; spin_unlock(&ses->iface_lock); } - spin_unlock(&ses->chan_lock); - if (server && !server->terminate) { - server->terminate = true; - cifs_signal_cifsd_for_reconnect(server, false); - } - spin_lock(&ses->chan_lock); - if (server) { - ses->chans[i].server = NULL; + if (!server->terminate) { + server->terminate = true; + cifs_signal_cifsd_for_reconnect(server, false); + } cifs_put_tcp_session(server, false); } + spin_lock(&ses->chan_lock); } done: diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 84ea67301303c45c805fafc9bb2f5648655ec728..5a3ca62d2f07f72584392975221cbc9b12276fe8 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -458,6 +458,8 @@ generate_smb3signingkey(struct cifs_ses *ses, ptriplet->encryption.context, ses->smb3encryptionkey, SMB3_ENC_DEC_KEY_SIZE); + if (rc) + return rc; rc = generate_key(ses, ptriplet->decryption.label, ptriplet->decryption.context, ses->smb3decryptionkey, @@ -466,9 +468,6 @@ generate_smb3signingkey(struct cifs_ses *ses, return rc; } - if (rc) - return rc; - #ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS cifs_dbg(VFS, "%s: dumping generated AES session keys\n", __func__); /* diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index ed0bc8cbc703d9c345f121f1226e13f419b511d4..567fb37274d35a796756ab86e7437bdb8fdf9479 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS bool "XFS online metadata check usage data collection" default y depends on XFS_ONLINE_SCRUB - select XFS_DEBUG + select DEBUG_FS help If you say Y here, the kernel will gather usage data about the online metadata check subsystem. This includes the number diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 3069194527dd06791d54cb74f865a663bb0b1624..100ab5931b3132e8b3de4daab94de0dc16f10e07 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist( ASSERT(mp->m_alloc_maxlevels > 0); + /* + * For a btree shorter than the maximum height, the worst case is that + * every level gets split and a new level is added, then while inserting + * another entry to refill the AGFL, every level under the old root gets + * split again. This is: + * + * (full height split reservation) + (AGFL refill split height) + * = (current height + 1) + (current height - 1) + * = (new height) + (new height - 2) + * = 2 * new height - 2 + * + * For a btree of maximum height, the worst case is that every level + * under the root gets split, then while inserting another entry to + * refill the AGFL, every level under the root gets split again. This is + * also: + * + * 2 * (current height - 1) + * = 2 * (new height - 1) + * = 2 * new height - 2 + */ + /* space needed by-bno freespace btree */ min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, - mp->m_alloc_maxlevels); + mp->m_alloc_maxlevels) * 2 - 2; /* space needed by-size freespace btree */ min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, - mp->m_alloc_maxlevels); + mp->m_alloc_maxlevels) * 2 - 2; /* space needed reverse mapping used space btree */ if (xfs_has_rmapbt(mp)) min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, - mp->m_rmap_maxlevels); + mp->m_rmap_maxlevels) * 2 - 2; return min_free; } diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index bcfb6a4203cdd9cb00bc7a25e0ec112df214e228..f71679ce23b95da4d07536581d15711eafa7fd46 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -245,21 +245,18 @@ xfs_defer_create_intents( return ret; } -/* Abort all the intents that were committed. */ STATIC void -xfs_defer_trans_abort( - struct xfs_trans *tp, - struct list_head *dop_pending) +xfs_defer_pending_abort( + struct xfs_mount *mp, + struct list_head *dop_list) { struct xfs_defer_pending *dfp; const struct xfs_defer_op_type *ops; - trace_xfs_defer_trans_abort(tp, _RET_IP_); - /* Abort intent items that don't have a done item. */ - list_for_each_entry(dfp, dop_pending, dfp_list) { + list_for_each_entry(dfp, dop_list, dfp_list) { ops = defer_op_types[dfp->dfp_type]; - trace_xfs_defer_pending_abort(tp->t_mountp, dfp); + trace_xfs_defer_pending_abort(mp, dfp); if (dfp->dfp_intent && !dfp->dfp_done) { ops->abort_intent(dfp->dfp_intent); dfp->dfp_intent = NULL; @@ -267,6 +264,16 @@ xfs_defer_trans_abort( } } +/* Abort all the intents that were committed. */ +STATIC void +xfs_defer_trans_abort( + struct xfs_trans *tp, + struct list_head *dop_pending) +{ + trace_xfs_defer_trans_abort(tp, _RET_IP_); + xfs_defer_pending_abort(tp->t_mountp, dop_pending); +} + /* * Capture resources that the caller said not to release ("held") when the * transaction commits. Caller is responsible for zero-initializing @dres. @@ -756,12 +763,13 @@ xfs_defer_ops_capture( /* Release all resources that we used to capture deferred ops. */ void -xfs_defer_ops_capture_free( +xfs_defer_ops_capture_abort( struct xfs_mount *mp, struct xfs_defer_capture *dfc) { unsigned short i; + xfs_defer_pending_abort(mp, &dfc->dfc_dfops); xfs_defer_cancel_list(mp, &dfc->dfc_dfops); for (i = 0; i < dfc->dfc_held.dr_bufs; i++) @@ -802,7 +810,7 @@ xfs_defer_ops_capture_and_commit( /* Commit the transaction and add the capture structure to the list. */ error = xfs_trans_commit(tp); if (error) { - xfs_defer_ops_capture_free(mp, dfc); + xfs_defer_ops_capture_abort(mp, dfc); return error; } diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 114a3a4930a3c47080a1e3e38bf06f3b17cb1bda..8788ad5f6a731fbe0fa20faa641eeaa8679a0a98 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp, struct list_head *capture_list); void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, struct xfs_defer_resources *dres); -void xfs_defer_ops_capture_free(struct xfs_mount *mp, +void xfs_defer_ops_capture_abort(struct xfs_mount *mp, struct xfs_defer_capture *d); void xfs_defer_resources_rele(struct xfs_defer_resources *dres); diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 543f3748c2a35334c7c95fda0bbea1d4f5d2bbdf..137a65bda95dc1922d5ecab62ec9c7d358ac1b4a 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -510,6 +510,9 @@ xfs_dinode_verify( if (mode && nextents + naextents > nblocks) return __this_address; + if (nextents + naextents == 0 && nblocks != 0) + return __this_address; + if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) return __this_address; diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 0e5dba2343ea13e2e0451178097f6ad247371724..144198a6b2702c9f825bd9ad22fe10cd1085ea65 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2( struct xfs_log_dinode *ldip; uint isize; int need_free = 0; + xfs_failaddr_t fa; if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { in_f = item->ri_buf[0].i_addr; @@ -369,24 +370,26 @@ xlog_recover_inode_commit_pass2( * superblock flag to determine whether we need to look at di_flushiter * to skip replay when the on disk inode is newer than the log one */ - if (!xfs_has_v3inodes(mp) && - ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { - /* - * Deal with the wrap case, DI_MAX_FLUSH is less - * than smaller numbers - */ - if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && - ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) { - /* do nothing */ - } else { - trace_xfs_log_recover_inode_skip(log, in_f); - error = 0; - goto out_release; + if (!xfs_has_v3inodes(mp)) { + if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { + /* + * Deal with the wrap case, DI_MAX_FLUSH is less + * than smaller numbers + */ + if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && + ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) { + /* do nothing */ + } else { + trace_xfs_log_recover_inode_skip(log, in_f); + error = 0; + goto out_release; + } } + + /* Take the opportunity to reset the flush iteration count */ + ldip->di_flushiter = 0; } - /* Take the opportunity to reset the flush iteration count */ - ldip->di_flushiter = 0; if (unlikely(S_ISREG(ldip->di_mode))) { if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && @@ -528,8 +531,19 @@ xlog_recover_inode_commit_pass2( (dip->di_mode != 0)) error = xfs_recover_inode_owner_change(mp, dip, in_f, buffer_list); - /* re-generate the checksum. */ + /* re-generate the checksum and validate the recovered inode. */ xfs_dinode_calc_crc(log->l_mp, dip); + fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip); + if (fa) { + XFS_CORRUPTION_ERROR( + "Bad dinode after recovery", + XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip)); + xfs_alert(mp, + "Metadata corruption detected at %pS, inode 0x%llx", + fa, in_f->ilf_ino); + error = -EFSCORRUPTED; + goto out_release; + } ASSERT(bp->b_mount == mp); bp->b_flags |= _XBF_LOGRECOVERY; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 51c100c861770f619776a8e3efb921d0b2e55bd8..ee206facf0dc065d4328007f7f32c089989c9c11 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1893,9 +1893,7 @@ xlog_write_iclog( * the buffer manually, the code needs to be kept in sync * with the I/O completion path. */ - xlog_state_done_syncing(iclog); - up(&iclog->ic_sema); - return; + goto sync; } /* @@ -1925,20 +1923,17 @@ xlog_write_iclog( * avoid shutdown re-entering this path and erroring out again. */ if (log->l_targ != log->l_mp->m_ddev_targp && - blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) { - xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); - return; - } + blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) + goto shutdown; } if (iclog->ic_flags & XLOG_ICL_NEED_FUA) iclog->ic_bio.bi_opf |= REQ_FUA; iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); - if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { - xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); - return; - } + if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) + goto shutdown; + if (is_vmalloc_addr(iclog->ic_data)) flush_kernel_vmap_range(iclog->ic_data, count); @@ -1959,6 +1954,12 @@ xlog_write_iclog( } submit_bio(&iclog->ic_bio); + return; +shutdown: + xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); +sync: + xlog_state_done_syncing(iclog); + up(&iclog->ic_sema); } /* diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 13b94d2e605bd9ddb852f76eeeea0bd956a595e0..a1e18b24971a28eedcf79b4d43b16778b42133f8 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2511,7 +2511,7 @@ xlog_abort_defer_ops( list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { list_del_init(&dfc->dfc_list); - xfs_defer_ops_capture_free(mp, dfc); + xfs_defer_ops_capture_abort(mp, dfc); } } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 658edee8381dcdca656135a74a9b5a1b6e13a8cb..e5b62dc2846644c142b04423fbec10e57d81c34d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent( } } del = got; + xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb); /* Grab the corresponding mapping in the data fork. */ nmaps = 1; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4825d3cdb292304bb256b40c54ac254dddb1e2d..6762dac3ef76153fe96bbd05a1050b9a31d1a43d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,7 +56,7 @@ extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; -extern bool bpf_global_ma_set, bpf_global_percpu_ma_set; +extern bool bpf_global_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, @@ -909,10 +909,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static bool bpf_is_ldimm64(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + static inline bool bpf_pseudo_func(const struct bpf_insn *insn) { - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; + return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } struct bpf_prog_ops { diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d305db70674bb539564fda04b226be40ac6acbbd..efc0c0b07efb41b7d4856ab290f8a836319a34d1 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -195,6 +195,7 @@ enum cpuhp_state { CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, + CPUHP_AP_HRTIMERS_DYING, CPUHP_AP_X86_TBOOT_DYING, CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index 45fca09b231943ea38f56f5079ba7e431074f8a9..69501e0ec239f93d7008b2446c2ac23e2c7e11d2 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -50,9 +50,7 @@ " .previous" "\n" \ ) -#ifdef CONFIG_IA64 -#define KSYM_FUNC(name) @fptr(name) -#elif defined(CONFIG_PARISC) && defined(CONFIG_64BIT) +#if defined(CONFIG_PARISC) && defined(CONFIG_64BIT) #define KSYM_FUNC(name) P%name #else #define KSYM_FUNC(name) name diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0ee140176f102f4eed0986f6eebb41c9ec839870..f2044d5a652b5c6eed1652f2a7f1c23050d854a6 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU -int hrtimers_dead_cpu(unsigned int cpu); +int hrtimers_cpu_dying(unsigned int cpu); #else -#define hrtimers_dead_cpu NULL +#define hrtimers_cpu_dying NULL #endif #endif diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 8fa23bdcedbf98f442ec37379bb88211c43ee33f..007fd9c3e4b62cc93a9bf54a1c6502c95feab7b7 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -420,7 +420,7 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) * A function that translates value of following registers to the linkmode: * IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20) * IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60) - * IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61) + * IEEE 802.3-2018 45.2.7.14 "EEE link partner ability 1" register (7.61) */ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index afb028c54f3397c50dd2163674a24cae61c47997..5547ba68e6e47d851915d1671b0a2a49bfefa4a1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -843,11 +843,11 @@ struct perf_event { }; /* - * ,-----------------------[1:n]----------------------. - * V V - * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event - * ^ ^ | | - * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-' + * ,-----------------------[1:n]------------------------. + * V V + * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event + * | | + * `--[n:1]-> pmu <-[1:n]--' * * * struct perf_event_pmu_context lifetime is refcount based and RCU freed @@ -865,6 +865,9 @@ struct perf_event { * ctx->mutex pinning the configuration. Since we hold a reference on * group_leader (through the filedesc) it can't go away, therefore it's * associated pmu_ctx must exist and cannot change due to ctx->mutex. + * + * perf_event holds a refcount on perf_event_context + * perf_event holds a refcount on perf_event_pmu_context */ struct perf_event_pmu_context { struct pmu *pmu; diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index c36e7a3b45e7e73fc24dcf1a836d568b0dcb85d1..3be2cb564710b5a7be3de43903c5786e15f704ad 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -14,6 +14,7 @@ #ifdef CONFIG_GCC_PLUGIN_STACKLEAK #include <asm/stacktrace.h> +#include <linux/linkage.h> /* * The lowest address on tsk's stack which we can plausibly erase. @@ -76,6 +77,11 @@ static inline void stackleak_task_init(struct task_struct *t) # endif } +asmlinkage void noinstr stackleak_erase(void); +asmlinkage void noinstr stackleak_erase_on_task_stack(void); +asmlinkage void noinstr stackleak_erase_off_task_stack(void); +void __no_caller_saved_registers noinstr stackleak_track_stack(void); + #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ static inline void stackleak_task_init(struct task_struct *t) { } #endif diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index d0f2797420f7044616c7c7ef9faccc956acf5a7c..a09e13a577a99a0f91916ad121329dde11f42b1f 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -5,13 +5,6 @@ #include <linux/pci.h> #include <linux/virtio_pci.h> -struct virtio_pci_modern_common_cfg { - struct virtio_pci_common_cfg cfg; - - __le16 queue_notify_data; /* read-write */ - __le16 queue_reset; /* read-write */ -}; - /** * struct virtio_pci_modern_device - info for modern PCI virtio * @pci_dev: Ptr to the PCI device struct diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3bbd13ab1ecf590bf8d8f040b072653d736b40b8..b157c5cafd14cfe307f3d36ad533d528f142eea6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -178,9 +178,9 @@ static inline __be32 nft_reg_load_be32(const u32 *sreg) return *(__force __be32 *)sreg; } -static inline void nft_reg_store64(u32 *dreg, u64 val) +static inline void nft_reg_store64(u64 *dreg, u64 val) { - put_unaligned(val, (u64 *)dreg); + put_unaligned(val, dreg); } static inline u64 nft_reg_load64(const u32 *sreg) diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index 8a6dbfb233362b2ef0a85e5738d5790ccc0d0a4b..77f87c622a2ef4c2ab7d19277fc95264ed28c9fb 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -58,6 +58,11 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) return to_ct_params(a)->nf_ft; } +static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a) +{ + return to_ct_params(a)->helper; +} + #else static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; } static inline int tcf_ct_action(const struct tc_action *a) { return 0; } @@ -65,6 +70,10 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) { return NULL; } +static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a) +{ + return NULL; +} #endif /* CONFIG_NF_CONNTRACK */ #if IS_ENABLED(CONFIG_NET_ACT_CT) diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index c25fc96145947ed3468aebfbad81d22015f5f239..d24e8e121507bb691e3fa272b63b44f3194f6776 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -219,6 +219,22 @@ */ #define BTRFS_METADATA_ITEM_KEY 169 +/* + * Special inline ref key which stores the id of the subvolume which originally + * created the extent. This subvolume owns the extent permanently from the + * perspective of simple quotas. Needed to know which subvolume to free quota + * usage from when the extent is deleted. + * + * Stored as an inline ref rather to avoid wasting space on a separate item on + * top of the existing extent item. However, unlike the other inline refs, + * there is one one owner ref per extent rather than one per extent. + * + * Because of this, it goes at the front of the list of inline refs, and thus + * must have a lower type value than any other inline ref type (to satisfy the + * disk format rule that inline refs have non-decreasing type). + */ +#define BTRFS_EXTENT_OWNER_REF_KEY 172 + #define BTRFS_TREE_BLOCK_REF_KEY 176 #define BTRFS_EXTENT_DATA_REF_KEY 178 @@ -233,14 +249,6 @@ #define BTRFS_SHARED_DATA_REF_KEY 184 -/* - * Special inline ref key which stores the id of the subvolume which originally - * created the extent. This subvolume owns the extent permanently from the - * perspective of simple quotas. Needed to know which subvolume to free quota - * usage from when the extent is deleted. - */ -#define BTRFS_EXTENT_OWNER_REF_KEY 188 - /* * block groups give us hints into the extent allocation trees. Which * blocks are free etc etc diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index f703afc7ad31ba0791101585fd95b9a10a48f3ce..44f4dd2add188090ff3b03d859fb4d27009d5479 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -166,6 +166,17 @@ struct virtio_pci_common_cfg { __le32 queue_used_hi; /* read-write */ }; +/* + * Warning: do not use sizeof on this: use offsetofend for + * specific fields you need. + */ +struct virtio_pci_modern_common_cfg { + struct virtio_pci_common_cfg cfg; + + __le16 queue_notify_data; /* read-write */ + __le16 queue_reset; /* read-write */ +}; + /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ struct virtio_pci_cfg_cap { struct virtio_pci_cap cap; diff --git a/include/xen/events.h b/include/xen/events.h index 23932b0673dc7459037c16e97642598a16f08877..3b07409f80320e3571ed32fe0877eea631af5655 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -88,7 +88,6 @@ void xen_irq_resume(void); /* Clear an irq's pending state, in preparation for polling on it */ void xen_clear_irq_pending(int irq); -void xen_set_irq_pending(int irq); bool xen_test_irq_pending(int irq); /* Poll waiting for an irq to become pending. In the usual case, the @@ -101,8 +100,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout); /* Determine the IRQ which is bound to an event channel */ unsigned int irq_from_evtchn(evtchn_port_t evtchn); -int irq_from_virq(unsigned int cpu, unsigned int virq); -evtchn_port_t evtchn_from_irq(unsigned irq); +int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq, + evtchn_port_t *evtchn); int xen_set_callback_via(uint64_t via); int xen_evtchn_do_upcall(void); @@ -122,9 +121,6 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, /* De-allocates the above mentioned physical interrupt. */ int xen_destroy_irq(int irq); -/* Return irq from pirq */ -int xen_irq_from_pirq(unsigned pirq); - /* Return the pirq allocated to the irq. */ int xen_pirq_from_irq(unsigned irq); diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index f04a43044d917ceef45fc5a2acfb1910978dd0f3..976e9500f6518cbc121d212af5e80334ef3e2ace 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -145,13 +145,8 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f) if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { struct io_sq_data *sq = ctx->sq_data; - if (mutex_trylock(&sq->lock)) { - if (sq->thread) { - sq_pid = task_pid_nr(sq->thread); - sq_cpu = task_cpu(sq->thread); - } - mutex_unlock(&sq->lock); - } + sq_pid = sq->task_pid; + sq_cpu = sq->sq_cpu; } seq_printf(m, "SqThread:\t%d\n", sq_pid); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index bd6c2c7959a5bf26c7a394bcd9cbf1b0e94fe595..65b5dbe3c850ed564432c76f17e64739d430f2fe 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -214,6 +214,7 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd) did_sig = get_signal(&ksig); cond_resched(); mutex_lock(&sqd->lock); + sqd->sq_cpu = raw_smp_processor_id(); } return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); } @@ -229,10 +230,15 @@ static int io_sq_thread(void *data) snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); set_task_comm(current, buf); - if (sqd->sq_cpu != -1) + /* reset to our pid after we've set task_comm, for fdinfo */ + sqd->task_pid = current->pid; + + if (sqd->sq_cpu != -1) { set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu)); - else + } else { set_cpus_allowed_ptr(current, cpu_online_mask); + sqd->sq_cpu = raw_smp_processor_id(); + } mutex_lock(&sqd->lock); while (1) { @@ -261,6 +267,7 @@ static int io_sq_thread(void *data) mutex_unlock(&sqd->lock); cond_resched(); mutex_lock(&sqd->lock); + sqd->sq_cpu = raw_smp_processor_id(); } continue; } @@ -294,6 +301,7 @@ static int io_sq_thread(void *data) mutex_unlock(&sqd->lock); schedule(); mutex_lock(&sqd->lock); + sqd->sq_cpu = raw_smp_processor_id(); } list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) atomic_andnot(IORING_SQ_NEED_WAKEUP, diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 91e82e34b51e328eed868e025a5873fd6f557534..7a98cd176a127d2ef1726024a34ad9674b54c28f 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -531,7 +531,7 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark) if (tsk != current) return 0; - if (WARN_ON_ONCE(!current->mm)) + if (!current->mm) return 0; exe_file = get_mm_exe_file(current->mm); if (!exe_file) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 08626b519ce23fce3cefb585e9aa246e9e43025b..cd3afe57ece3cc9a5a52c20243bdafd7fa987f4f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -64,8 +64,8 @@ #define OFF insn->off #define IMM insn->imm -struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; -bool bpf_global_ma_set, bpf_global_percpu_ma_set; +struct bpf_mem_alloc bpf_global_ma; +bool bpf_global_ma_set; /* No hurry in this branch * @@ -2934,9 +2934,7 @@ static int __init bpf_global_ma_init(void) ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false); bpf_global_ma_set = !ret; - ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); - bpf_global_percpu_ma_set = !ret; - return !bpf_global_ma_set || !bpf_global_percpu_ma_set; + return ret; } late_initcall(bpf_global_ma_init); #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bd1c42eb540f1f7565d1d0c10457884c10f7a37a..6da370a047feb90c4866310b57cff4c4d9eafe84 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -26,6 +26,7 @@ #include <linux/poison.h> #include <linux/module.h> #include <linux/cpumask.h> +#include <linux/bpf_mem_alloc.h> #include <net/xdp.h> #include "disasm.h" @@ -41,6 +42,9 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { #undef BPF_LINK_TYPE }; +struct bpf_mem_alloc bpf_global_percpu_ma; +static bool bpf_global_percpu_ma_set; + /* bpf_check() is a static code analyzer that walks eBPF program * instruction by instruction and updates register/stack state. * All paths of conditional branches are analyzed until 'bpf_exit' insn. @@ -336,6 +340,7 @@ struct bpf_kfunc_call_arg_meta { struct btf *btf_vmlinux; static DEFINE_MUTEX(bpf_verifier_lock); +static DEFINE_MUTEX(bpf_percpu_ma_lock); static const struct bpf_line_info * find_linfo(const struct bpf_verifier_env *env, u32 insn_off) @@ -3516,12 +3521,29 @@ static int push_jmp_history(struct bpf_verifier_env *env, /* Backtrack one insn at a time. If idx is not at the top of recorded * history then previous instruction came from straight line execution. + * Return -ENOENT if we exhausted all instructions within given state. + * + * It's legal to have a bit of a looping with the same starting and ending + * insn index within the same state, e.g.: 3->4->5->3, so just because current + * instruction index is the same as state's first_idx doesn't mean we are + * done. If there is still some jump history left, we should keep going. We + * need to take into account that we might have a jump history between given + * state's parent and itself, due to checkpointing. In this case, we'll have + * history entry recording a jump from last instruction of parent state and + * first instruction of given state. */ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, u32 *history) { u32 cnt = *history; + if (i == st->first_insn_idx) { + if (cnt == 0) + return -ENOENT; + if (cnt == 1 && st->jmp_history[0].idx == i) + return -ENOENT; + } + if (cnt && st->jmp_history[cnt - 1].idx == i) { i = st->jmp_history[cnt - 1].prev_idx; (*history)--; @@ -4401,10 +4423,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) * Nothing to be tracked further in the parent state. */ return 0; - if (i == first_idx) - break; subseq_idx = i; i = get_prev_insn_idx(st, i, &history); + if (i == -ENOENT) + break; if (i >= env->prog->len) { /* This can happen if backtracking reached insn 0 * and there are still reg_mask or stack_mask @@ -12074,8 +12096,19 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set) return -ENOMEM; - if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set) - return -ENOMEM; + if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { + if (!bpf_global_percpu_ma_set) { + mutex_lock(&bpf_percpu_ma_lock); + if (!bpf_global_percpu_ma_set) { + err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); + if (!err) + bpf_global_percpu_ma_set = true; + } + mutex_unlock(&bpf_percpu_ma_lock); + if (err) + return err; + } + } if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) { verbose(env, "local type ID argument must be in range [0, U32_MAX]\n"); @@ -15386,8 +15419,7 @@ enum { * w - next instruction * e - edge */ -static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, - bool loop_ok) +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) { int *insn_stack = env->cfg.insn_stack; int *insn_state = env->cfg.insn_state; @@ -15419,7 +15451,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, insn_stack[env->cfg.cur_stack++] = w; return KEEP_EXPLORING; } else if ((insn_state[w] & 0xF0) == DISCOVERED) { - if (loop_ok && env->bpf_capable) + if (env->bpf_capable) return DONE_EXPLORING; verbose_linfo(env, t, "%d: ", t); verbose_linfo(env, w, "%d: ", w); @@ -15439,24 +15471,20 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, struct bpf_verifier_env *env, bool visit_callee) { - int ret; + int ret, insn_sz; - ret = push_insn(t, t + 1, FALLTHROUGH, env, false); + insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; + ret = push_insn(t, t + insn_sz, FALLTHROUGH, env); if (ret) return ret; - mark_prune_point(env, t + 1); + mark_prune_point(env, t + insn_sz); /* when we exit from subprog, we need to record non-linear history */ - mark_jmp_point(env, t + 1); + mark_jmp_point(env, t + insn_sz); if (visit_callee) { mark_prune_point(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env, - /* It's ok to allow recursion from CFG point of - * view. __check_func_call() will do the actual - * check. - */ - bpf_pseudo_func(insns + t)); + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); } return ret; } @@ -15469,15 +15497,17 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, static int visit_insn(int t, struct bpf_verifier_env *env) { struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t]; - int ret, off; + int ret, off, insn_sz; if (bpf_pseudo_func(insn)) return visit_func_call_insn(t, insns, env, true); /* All non-branch instructions have a single fall-through edge. */ if (BPF_CLASS(insn->code) != BPF_JMP && - BPF_CLASS(insn->code) != BPF_JMP32) - return push_insn(t, t + 1, FALLTHROUGH, env, false); + BPF_CLASS(insn->code) != BPF_JMP32) { + insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; + return push_insn(t, t + insn_sz, FALLTHROUGH, env); + } switch (BPF_OP(insn->code)) { case BPF_EXIT: @@ -15523,8 +15553,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env) off = insn->imm; /* unconditional jump with single edge */ - ret = push_insn(t, t + off + 1, FALLTHROUGH, env, - true); + ret = push_insn(t, t + off + 1, FALLTHROUGH, env); if (ret) return ret; @@ -15537,11 +15566,11 @@ static int visit_insn(int t, struct bpf_verifier_env *env) /* conditional jump with two edges */ mark_prune_point(env, t); - ret = push_insn(t, t + 1, FALLTHROUGH, env, true); + ret = push_insn(t, t + 1, FALLTHROUGH, env); if (ret) return ret; - return push_insn(t, t + insn->off + 1, BRANCH, env, true); + return push_insn(t, t + insn->off + 1, BRANCH, env); } } @@ -15607,11 +15636,21 @@ static int check_cfg(struct bpf_verifier_env *env) } for (i = 0; i < insn_cnt; i++) { + struct bpf_insn *insn = &env->prog->insnsi[i]; + if (insn_state[i] != EXPLORED) { verbose(env, "unreachable insn %d\n", i); ret = -EINVAL; goto err_free; } + if (bpf_is_ldimm64(insn)) { + if (insn_state[i + 1] != 0) { + verbose(env, "jump into the middle of ldimm64 insn %d\n", i); + ret = -EINVAL; + goto err_free; + } + i++; /* skip second half of ldimm64 */ + } } ret = 0; /* cfg looks good */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1d5b9de3b1b9d01791b1222bf2fcbb4e46c852ee..4b9ff41ca603a3a9952d079b3180b43474b9f892 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3885,14 +3885,6 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); } -static int cgroup_pressure_open(struct kernfs_open_file *of) -{ - if (of->file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE)) - return -EPERM; - - return 0; -} - static void cgroup_pressure_release(struct kernfs_open_file *of) { struct cgroup_file_ctx *ctx = of->priv; @@ -5299,7 +5291,6 @@ static struct cftype cgroup_psi_files[] = { { .name = "io.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_IO]), - .open = cgroup_pressure_open, .seq_show = cgroup_io_pressure_show, .write = cgroup_io_pressure_write, .poll = cgroup_pressure_poll, @@ -5308,7 +5299,6 @@ static struct cftype cgroup_psi_files[] = { { .name = "memory.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_MEM]), - .open = cgroup_pressure_open, .seq_show = cgroup_memory_pressure_show, .write = cgroup_memory_pressure_write, .poll = cgroup_pressure_poll, @@ -5317,7 +5307,6 @@ static struct cftype cgroup_psi_files[] = { { .name = "cpu.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_CPU]), - .open = cgroup_pressure_open, .seq_show = cgroup_cpu_pressure_show, .write = cgroup_cpu_pressure_write, .poll = cgroup_pressure_poll, @@ -5327,7 +5316,6 @@ static struct cftype cgroup_psi_files[] = { { .name = "irq.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_IRQ]), - .open = cgroup_pressure_open, .seq_show = cgroup_irq_pressure_show, .write = cgroup_irq_pressure_write, .poll = cgroup_pressure_poll, diff --git a/kernel/cpu.c b/kernel/cpu.c index 9e4c6780adde8f71c9ad14ba3c312e865ac3485d..a86972a91991580b2edcf205b4b6dfd96deeec69 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2113,7 +2113,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { [CPUHP_HRTIMERS_PREPARE] = { .name = "hrtimers:prepare", .startup.single = hrtimers_prepare_cpu, - .teardown.single = hrtimers_dead_cpu, + .teardown.single = NULL, }, [CPUHP_SMPCFD_PREPARE] = { .name = "smpcfd:prepare", @@ -2205,6 +2205,12 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = NULL, .teardown.single = smpcfd_dying_cpu, }, + [CPUHP_AP_HRTIMERS_DYING] = { + .name = "hrtimers:dying", + .startup.single = NULL, + .teardown.single = hrtimers_cpu_dying, + }, + /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { diff --git a/kernel/events/core.c b/kernel/events/core.c index 683dc086ef10a53123c8aa175b436c556a51a410..b704d83a28b29bace1741616eeff55a1cf03f413 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4828,6 +4828,11 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx, void *task_ctx_data = NULL; if (!ctx->task) { + /* + * perf_pmu_migrate_context() / __perf_pmu_install_event() + * relies on the fact that find_get_pmu_context() cannot fail + * for CPU contexts. + */ struct perf_cpu_pmu_context *cpc; cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu); @@ -12889,6 +12894,9 @@ static void __perf_pmu_install_event(struct pmu *pmu, int cpu, struct perf_event *event) { struct perf_event_pmu_context *epc; + struct perf_event_context *old_ctx = event->ctx; + + get_ctx(ctx); /* normally find_get_context() */ event->cpu = cpu; epc = find_get_pmu_context(pmu, ctx, event); @@ -12897,6 +12905,11 @@ static void __perf_pmu_install_event(struct pmu *pmu, if (event->state >= PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_INACTIVE; perf_install_in_context(ctx, event, cpu); + + /* + * Now that event->ctx is updated and visible, put the old ctx. + */ + put_ctx(old_ctx); } static void __perf_pmu_install(struct perf_event_context *ctx, @@ -12935,6 +12948,10 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) struct perf_event_context *src_ctx, *dst_ctx; LIST_HEAD(events); + /* + * Since per-cpu context is persistent, no need to grab an extra + * reference. + */ src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx; dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx; diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 52695c59d04114350d29f1bf0403c19b873663d6..dad981a865b841c954deed1934674f514832ff7e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -700,7 +700,8 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, owner = uval & FUTEX_TID_MASK; if (pending_op && !pi && !owner) { - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1, + FUTEX_BITSET_MATCH_ANY); return 0; } @@ -752,8 +753,10 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, * Wake robust non-PI futexes here. The wakeup of * PI futexes happens in exit_pi_state(): */ - if (!pi && (uval & FUTEX_WAITERS)) - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + if (!pi && (uval & FUTEX_WAITERS)) { + futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1, + FUTEX_BITSET_MATCH_ANY); + } return 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2048138ce54b574a3ba56b9f6bf7b1cefac1fd32..d7a3c63a2171a8569abc8fe7c3997dc1d22e001e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3666,41 +3666,140 @@ static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } #endif +static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se, + unsigned long weight) +{ + unsigned long old_weight = se->load.weight; + u64 avruntime = avg_vruntime(cfs_rq); + s64 vlag, vslice; + + /* + * VRUNTIME + * ======== + * + * COROLLARY #1: The virtual runtime of the entity needs to be + * adjusted if re-weight at !0-lag point. + * + * Proof: For contradiction assume this is not true, so we can + * re-weight without changing vruntime at !0-lag point. + * + * Weight VRuntime Avg-VRuntime + * before w v V + * after w' v' V' + * + * Since lag needs to be preserved through re-weight: + * + * lag = (V - v)*w = (V'- v')*w', where v = v' + * ==> V' = (V - v)*w/w' + v (1) + * + * Let W be the total weight of the entities before reweight, + * since V' is the new weighted average of entities: + * + * V' = (WV + w'v - wv) / (W + w' - w) (2) + * + * by using (1) & (2) we obtain: + * + * (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v + * ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v + * ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v + * ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3) + * + * Since we are doing at !0-lag point which means V != v, we + * can simplify (3): + * + * ==> W / (W + w' - w) = w / w' + * ==> Ww' = Ww + ww' - ww + * ==> W * (w' - w) = w * (w' - w) + * ==> W = w (re-weight indicates w' != w) + * + * So the cfs_rq contains only one entity, hence vruntime of + * the entity @v should always equal to the cfs_rq's weighted + * average vruntime @V, which means we will always re-weight + * at 0-lag point, thus breach assumption. Proof completed. + * + * + * COROLLARY #2: Re-weight does NOT affect weighted average + * vruntime of all the entities. + * + * Proof: According to corollary #1, Eq. (1) should be: + * + * (V - v)*w = (V' - v')*w' + * ==> v' = V' - (V - v)*w/w' (4) + * + * According to the weighted average formula, we have: + * + * V' = (WV - wv + w'v') / (W - w + w') + * = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w') + * = (WV - wv + w'V' - Vw + wv) / (W - w + w') + * = (WV + w'V' - Vw) / (W - w + w') + * + * ==> V'*(W - w + w') = WV + w'V' - Vw + * ==> V' * (W - w) = (W - w) * V (5) + * + * If the entity is the only one in the cfs_rq, then reweight + * always occurs at 0-lag point, so V won't change. Or else + * there are other entities, hence W != w, then Eq. (5) turns + * into V' = V. So V won't change in either case, proof done. + * + * + * So according to corollary #1 & #2, the effect of re-weight + * on vruntime should be: + * + * v' = V' - (V - v) * w / w' (4) + * = V - (V - v) * w / w' + * = V - vl * w / w' + * = V - vl' + */ + if (avruntime != se->vruntime) { + vlag = (s64)(avruntime - se->vruntime); + vlag = div_s64(vlag * old_weight, weight); + se->vruntime = avruntime - vlag; + } + + /* + * DEADLINE + * ======== + * + * When the weight changes, the virtual time slope changes and + * we should adjust the relative virtual deadline accordingly. + * + * d' = v' + (d - v)*w/w' + * = V' - (V - v)*w/w' + (d - v)*w/w' + * = V - (V - v)*w/w' + (d - v)*w/w' + * = V + (d - V)*w/w' + */ + vslice = (s64)(se->deadline - avruntime); + vslice = div_s64(vslice * old_weight, weight); + se->deadline = avruntime + vslice; +} + static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { - unsigned long old_weight = se->load.weight; + bool curr = cfs_rq->curr == se; if (se->on_rq) { /* commit outstanding execution time */ - if (cfs_rq->curr == se) + if (curr) update_curr(cfs_rq); else - avg_vruntime_sub(cfs_rq, se); + __dequeue_entity(cfs_rq, se); update_load_sub(&cfs_rq->load, se->load.weight); } dequeue_load_avg(cfs_rq, se); - update_load_set(&se->load, weight); - if (!se->on_rq) { /* * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i), * we need to scale se->vlag when w_i changes. */ - se->vlag = div_s64(se->vlag * old_weight, weight); + se->vlag = div_s64(se->vlag * se->load.weight, weight); } else { - s64 deadline = se->deadline - se->vruntime; - /* - * When the weight changes, the virtual time slope changes and - * we should adjust the relative virtual deadline accordingly. - */ - deadline = div_s64(deadline * old_weight, weight); - se->deadline = se->vruntime + deadline; - if (se != cfs_rq->curr) - min_deadline_cb_propagate(&se->run_node, NULL); + reweight_eevdf(cfs_rq, se, weight); } + update_load_set(&se->load, weight); + #ifdef CONFIG_SMP do { u32 divider = get_pelt_divider(&se->avg); @@ -3712,8 +3811,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, enqueue_load_avg(cfs_rq, se); if (se->on_rq) { update_load_add(&cfs_rq->load, se->load.weight); - if (cfs_rq->curr != se) - avg_vruntime_add(cfs_rq, se); + if (!curr) { + /* + * The entity's vruntime has been adjusted, so let's check + * whether the rq-wide min_vruntime needs updated too. Since + * the calculations above require stable min_vruntime rather + * than up-to-date one, we do the update at the end of the + * reweight process. + */ + __enqueue_entity(cfs_rq, se); + update_min_vruntime(cfs_rq); + } } } @@ -3857,14 +3965,11 @@ static void update_cfs_group(struct sched_entity *se) #ifndef CONFIG_SMP shares = READ_ONCE(gcfs_rq->tg->shares); - - if (likely(se->load.weight == shares)) - return; #else - shares = calc_group_shares(gcfs_rq); + shares = calc_group_shares(gcfs_rq); #endif - - reweight_entity(cfs_rq_of(se), se, shares); + if (unlikely(se->load.weight != shares)) + reweight_entity(cfs_rq_of(se), se, shares); } #else /* CONFIG_FAIR_GROUP_SCHED */ @@ -11079,12 +11184,16 @@ static int should_we_balance(struct lb_env *env) continue; } - /* Are we the first idle CPU? */ + /* + * Are we the first idle core in a non-SMT domain or higher, + * or the first idle CPU in a SMT domain? + */ return cpu == env->dst_cpu; } - if (idle_smt == env->dst_cpu) - return true; + /* Are we the first idle CPU with busy siblings? */ + if (idle_smt != -1) + return idle_smt == env->dst_cpu; /* Are we the first CPU of this group ? */ return group_balance_cpu(sg) == env->dst_cpu; diff --git a/kernel/sys.c b/kernel/sys.c index 420d9cb9cc8e203f50014bb2ec564f6598d9869c..e219fcfa112d863eeef58381d04fd4bab16a1e32 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2394,6 +2394,10 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) return -EINVAL; + /* PARISC cannot allow mdwe as it needs writable stacks */ + if (IS_ENABLED(CONFIG_PARISC)) + return -EINVAL; + current_bits = get_current_mdwe(); if (current_bits && current_bits != bits) return -EPERM; /* Cannot unset the flags */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 238262e4aba7e2887f2766cc17b61aa0eb546308..760793998cdd703a387c64a792a7b7f7dab552d5 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2219,29 +2219,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, } } -int hrtimers_dead_cpu(unsigned int scpu) +int hrtimers_cpu_dying(unsigned int dying_cpu) { struct hrtimer_cpu_base *old_base, *new_base; - int i; + int i, ncpu = cpumask_first(cpu_active_mask); - BUG_ON(cpu_online(scpu)); - tick_cancel_sched_timer(scpu); + tick_cancel_sched_timer(dying_cpu); + + old_base = this_cpu_ptr(&hrtimer_bases); + new_base = &per_cpu(hrtimer_bases, ncpu); - /* - * this BH disable ensures that raise_softirq_irqoff() does - * not wakeup ksoftirqd (and acquire the pi-lock) while - * holding the cpu_base lock - */ - local_bh_disable(); - local_irq_disable(); - old_base = &per_cpu(hrtimer_bases, scpu); - new_base = this_cpu_ptr(&hrtimer_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - raw_spin_lock(&new_base->lock); - raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock(&old_base->lock); + raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], @@ -2252,15 +2245,13 @@ int hrtimers_dead_cpu(unsigned int scpu) * The migration might have changed the first expiring softirq * timer on this CPU. Update it. */ - hrtimer_update_softirq_timer(new_base, false); + __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); + /* Tell the other CPU to retrigger the next event */ + smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); - raw_spin_unlock(&old_base->lock); raw_spin_unlock(&new_base->lock); + raw_spin_unlock(&old_base->lock); - /* Check, if we got expired work to do */ - __hrtimer_peek_ahead_timers(); - local_irq_enable(); - local_bh_enable(); return 0; } diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c index a0d06095be83de601e292f3154235bfcde946dae..8dcb8ca39767c8dfbf63eaf5e82ce46c4b848381 100644 --- a/lib/zstd/common/fse_decompress.c +++ b/lib/zstd/common/fse_decompress.c @@ -312,7 +312,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size typedef struct { short ncount[FSE_MAX_SYMBOL_VALUE + 1]; - FSE_DTable dtable[1]; /* Dynamically sized */ + FSE_DTable dtable[]; /* Dynamically sized */ } FSE_DecompressWksp; diff --git a/mm/damon/core.c b/mm/damon/core.c index 630077d95dc60721015ea4b195c85e1c73f484ce..6262d55904e744a4a41431127266971eb5ee3d8b 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -924,7 +924,7 @@ static bool __damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, matched = true; break; default: - break; + return false; } return matched == filter->matching; diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 45bd0fd4a8b1616d6336d27175fc33a372db41e7..be667236b8e6e30713a205a4987d50c3b48ab463 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -162,6 +162,9 @@ damon_sysfs_scheme_regions_alloc(void) struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions), GFP_KERNEL); + if (!regions) + return NULL; + regions->kobj = (struct kobject){}; INIT_LIST_HEAD(®ions->regions_list); regions->nr_regions = 0; @@ -1823,6 +1826,8 @@ static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx, return 0; region = damon_sysfs_scheme_region_alloc(r); + if (!region) + return 0; list_add_tail(®ion->list, &sysfs_regions->regions_list); sysfs_regions->nr_regions++; if (kobject_init_and_add(®ion->kobj, diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index e27846708b5a248b9632d662004a0d49a56844b7..7472404456aa812e302cbee3b7b9d0fc490699b7 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1172,7 +1172,7 @@ static int damon_sysfs_update_target(struct damon_target *target, struct damon_ctx *ctx, struct damon_sysfs_target *sys_target) { - int err; + int err = 0; if (damon_target_has_pid(ctx)) { err = damon_sysfs_update_target_pid(target, sys_target->pid); @@ -1203,8 +1203,10 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx, damon_for_each_target_safe(t, next, ctx) { if (i < sysfs_targets->nr) { - damon_sysfs_update_target(t, ctx, + err = damon_sysfs_update_target(t, ctx, sysfs_targets->targets_arr[i]); + if (err) + return err; } else { if (damon_target_has_pid(ctx)) put_pid(t->pid); diff --git a/mm/filemap.c b/mm/filemap.c index 9710f43a89acd3ade2c8289b6e9352fcf11cf9e5..32eedf3afd45883a7c34920c4a097906b743621e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3443,7 +3443,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, * handled in the specific fault path, and it'll prohibit the * fault-around logic. */ - if (!pte_none(vmf->pte[count])) + if (!pte_none(ptep_get(&vmf->pte[count]))) goto skip; count++; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f31f02472396e7671132e965a93664a62238fca5..4f542444a91f2a30df8381693fa1f28686a81534 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2769,13 +2769,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) int nr = folio_nr_pages(folio); xas_split(&xas, folio, folio_order(folio)); - if (folio_test_swapbacked(folio)) { - __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, - -nr); - } else { - __lruvec_stat_mod_folio(folio, NR_FILE_THPS, - -nr); - filemap_nr_thps_dec(mapping); + if (folio_test_pmd_mappable(folio)) { + if (folio_test_swapbacked(folio)) { + __lruvec_stat_mod_folio(folio, + NR_SHMEM_THPS, -nr); + } else { + __lruvec_stat_mod_folio(folio, + NR_FILE_THPS, -nr); + filemap_nr_thps_dec(mapping); + } } } diff --git a/mm/ksm.c b/mm/ksm.c index 7efcc68ccc6eaeaa910474a745f835c7f7b04d66..6a831009b4cbf9e191707571aacf0a6d4c8db7e1 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -468,7 +468,7 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex page = pfn_swap_entry_to_page(entry); } /* return 1 if the page is an normal ksm page or KSM-placed zero page */ - ret = (page && PageKsm(page)) || is_ksm_zero_pte(*pte); + ret = (page && PageKsm(page)) || is_ksm_zero_pte(ptent); pte_unmap_unlock(pte, ptl); return ret; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 774bd6e21e2788ac1ee094c84176b488543fd6c3..1c1061df9cd17cb664d5d6b9faf1ac79db2cef6a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2936,7 +2936,8 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg) * Moreover, it should not come from DMA buffer and is not readily * reclaimable. So those GFP bits should be masked off. */ -#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT) +#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ + __GFP_ACCOUNT | __GFP_NOFAIL) /* * mod_objcg_mlstate() may be called with irq enabled, so diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 96d9eae5c7cc8e21e2fd7d6c8dacb172ab6e7e90..0b6ca553bebec523580e0fb1d43ede8646134342 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -312,7 +312,7 @@ static int mfill_atomic_pte_poison(pmd_t *dst_pmd, ret = -EEXIST; /* Refuse to overwrite any PTE, even a PTE marker (e.g. UFFD WP). */ - if (!pte_none(*dst_pte)) + if (!pte_none(ptep_get(dst_pte))) goto out_unlock; set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); diff --git a/mm/util.c b/mm/util.c index aa01f6ea5a75b7add33836dbe1d66c98dbd2c2e6..744b4d7e3fae2d2f60a34599ea4a1e43d7412315 100644 --- a/mm/util.c +++ b/mm/util.c @@ -414,6 +414,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { +#ifdef CONFIG_STACK_GROWSUP + /* + * For an upwards growing stack the calculation is much simpler. + * Memory for the maximum stack size is reserved at the top of the + * task. mmap_base starts directly below the stack and grows + * downwards. + */ + return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd); +#else unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_guard_gap; @@ -431,6 +440,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) gap = MAX_GAP; return PAGE_ALIGN(STACK_TOP - gap - rnd); +#endif } void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index b5c406a6e7654f95fd323936219f0abd3dc66d65..abb090f94ed2609eeb9cd54b4e5faed1c3cb7bfe 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -37,7 +37,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, ktime_t tstamp = skb->tstamp; struct ip_frag_state state; struct iphdr *iph; - int err; + int err = 0; /* for offloaded checksums cleanup checksum before fragmentation */ if (skb->ip_summed == CHECKSUM_PARTIAL && diff --git a/net/core/dev.c b/net/core/dev.c index 0d548431f3fadfd37af7c58664d3c6a706e8ff80..af53f6d838ce9e5ec73570d31ac39b9f36b1c5c5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1119,7 +1119,9 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res) if (i == max_netdevices) return -ENFILE; - snprintf(res, IFNAMSIZ, name, i); + /* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */ + strscpy(buf, name, IFNAMSIZ); + snprintf(res, IFNAMSIZ, buf, i); return i; } diff --git a/net/core/gso_test.c b/net/core/gso_test.c index ceb684be4cbf862e25cabb4c46a6f2dcdb679994..4c2e77bd12f4b17f57f115ce4f9b99fb1da0a875 100644 --- a/net/core/gso_test.c +++ b/net/core/gso_test.c @@ -180,18 +180,17 @@ static void gso_test_func(struct kunit *test) } if (tcase->frag_skbs) { - unsigned int total_size = 0, total_true_size = 0, alloc_size = 0; + unsigned int total_size = 0, total_true_size = 0; struct sk_buff *frag_skb, *prev = NULL; - page = alloc_page(GFP_KERNEL); - KUNIT_ASSERT_NOT_NULL(test, page); - page_ref_add(page, tcase->nr_frag_skbs - 1); - for (i = 0; i < tcase->nr_frag_skbs; i++) { unsigned int frag_size; + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + frag_size = tcase->frag_skbs[i]; - frag_skb = build_skb(page_address(page) + alloc_size, + frag_skb = build_skb(page_address(page), frag_size + shinfo_size); KUNIT_ASSERT_NOT_NULL(test, frag_skb); __skb_put(frag_skb, frag_size); @@ -204,11 +203,8 @@ static void gso_test_func(struct kunit *test) total_size += frag_size; total_true_size += frag_skb->truesize; - alloc_size += frag_size + shinfo_size; } - KUNIT_ASSERT_LE(test, alloc_size, PAGE_SIZE); - skb->len += total_size; skb->data_len += total_size; skb->truesize += total_true_size; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 598c1b114d2c2256fa06735b823b38cef70a8a34..a532f749e47781cc951f2003f621cec4387a2384 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -751,12 +751,12 @@ int __inet_hash(struct sock *sk, struct sock *osk) if (err) goto unlock; } + sock_set_flag(sk, SOCK_RCU_FREE); if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head); else __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head); - sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: spin_unlock(&ilb2->lock); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 1529ec35881552a53b18afb31ded27eece49a8d6..bf4d96f6f99a6e87d1b30c84a316d12c925bbcb2 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1515,8 +1515,9 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, rm_list, list) { - remove_anno_list_by_saddr(msk, &entry->addr); - if (alist.nr < MPTCP_RM_IDS_MAX) + if ((remove_anno_list_by_saddr(msk, &entry->addr) || + lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) && + alist.nr < MPTCP_RM_IDS_MAX) alist.ids[alist.nr++] = entry->addr.id; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a0b8356cd8c58f2d2ea46e385804b4525cf7950a..bc81ea53a04992ef4793da9fd688a7c72b3227d5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1230,6 +1230,8 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, mptcp_do_fallback(ssk); } +#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1)) + static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_data_frag *dfrag, struct mptcp_sendmsg_info *info) @@ -1256,6 +1258,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, return -EAGAIN; /* compute send limit */ + if (unlikely(ssk->sk_gso_max_size > MPTCP_MAX_GSO_SIZE)) + ssk->sk_gso_max_size = MPTCP_MAX_GSO_SIZE; info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); copy = info->size_goal; @@ -3398,10 +3402,11 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) __mptcp_clean_una_wakeup(sk); if (unlikely(msk->cb_flags)) { - /* be sure to set the current sk state before tacking actions - * depending on sk_state, that is processing MPTCP_ERROR_REPORT + /* be sure to set the current sk state before taking actions + * depending on sk_state (MPTCP_ERROR_REPORT) + * On sk release avoid actions depending on the first subflow */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags)) + if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first) __mptcp_set_connected(sk); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 77f5e8932abf64370e00ffe473a0cd9431494898..35368073370048f400ea82416cb0e0a0c6595a58 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -738,8 +738,11 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, val = READ_ONCE(inet_sk(sk)->tos); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; + slow = lock_sock_fast(ssk); __ip_sock_set_tos(ssk, val); + unlock_sock_fast(ssk, slow); } release_sock(sk); diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index f8854bff286cbd7229e0c543689f892f40e80cab..62fb1031763d14f82dcfee50191a21cb38ad8cf9 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -89,11 +89,6 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, if ((had_link == has_link) || chained) return 0; - if (had_link) - netif_carrier_off(ndp->ndev.dev); - else - netif_carrier_on(ndp->ndev.dev); - if (!ndp->multi_package && !nc->package->multi_channel) { if (had_link) { ndp->flags |= NCSI_DEV_RESHUFFLE; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 35d2f9c9ada0252e01af3bee1faa1f63a0c94498..4c133e06be1de2f8972b50ac87e6b0b7bfc9ac6d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); ip_set_dereference((inst)->ip_set_list)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] +#define ip_set_dereference_nfnl(p) \ + rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set) static struct ip_set * ip_set_rcu_get(struct net *net, ip_set_id_t index) { - struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - rcu_read_lock(); - /* ip_set_list itself needs to be protected */ - set = rcu_dereference(inst->ip_set_list)[index]; - rcu_read_unlock(); - - return set; + /* ip_set_list and the set pointer need to be protected */ + return ip_set_dereference_nfnl(inst->ip_set_list)[index]; } static inline void @@ -1397,6 +1394,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); + /* Make sure all readers of the old set pointers are completed. */ + synchronize_rcu(); + return 0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a761ee6796f6fa448ba6ce8dbc50b34aaebd8e6b..c0a42989b982266aa7378f06c0a46b5668335d3a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7263,10 +7263,11 @@ static int nf_tables_delsetelem(struct sk_buff *skb, if (err < 0) { NL_SET_BAD_ATTR(extack, attr); - break; + return err; } } - return err; + + return 0; } /* @@ -9679,16 +9680,14 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) call_rcu(&trans->rcu, nft_trans_gc_trans_free); } -static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq, - bool sync) +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq) { - struct nft_set_elem_catchall *catchall, *next; + struct nft_set_elem_catchall *catchall; const struct nft_set *set = gc->set; - struct nft_elem_priv *elem_priv; struct nft_set_ext *ext; - list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { + list_for_each_entry_rcu(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_expired(ext)) @@ -9698,35 +9697,42 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, nft_set_elem_dead(ext); dead_elem: - if (sync) - gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); - else - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); - + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); if (!gc) return NULL; - elem_priv = catchall->elem; - if (sync) { - nft_setelem_data_deactivate(gc->net, gc->set, elem_priv); - nft_setelem_catchall_destroy(catchall); - } - - nft_trans_gc_elem_add(gc, elem_priv); + nft_trans_gc_elem_add(gc, catchall->elem); } return gc; } -struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, - unsigned int gc_seq) -{ - return nft_trans_gc_catchall(gc, gc_seq, false); -} - struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) { - return nft_trans_gc_catchall(gc, 0, true); + struct nft_set_elem_catchall *catchall, *next; + const struct nft_set *set = gc->set; + struct nft_elem_priv *elem_priv; + struct nft_set_ext *ext; + + WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)); + + list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + + if (!nft_set_elem_expired(ext)) + continue; + + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); + if (!gc) + return NULL; + + elem_priv = catchall->elem; + nft_setelem_data_deactivate(gc->net, gc->set, elem_priv); + nft_setelem_catchall_destroy(catchall); + nft_trans_gc_elem_add(gc, elem_priv); + } + + return gc; } static void nf_tables_module_autoload_cleanup(struct net *net) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index e596d1a842f7024a5b0237985d9d69a999528b95..f6e791a6810151823fa021849f4809af715e5a67 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -38,13 +38,14 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->size) { case 8: { + u64 *dst64 = (void *)dst; u64 src64; switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 8; i++) { src64 = nft_reg_load64(&src[i]); - nft_reg_store64(&dst[i], + nft_reg_store64(&dst64[i], be64_to_cpu((__force __be64)src64)); } break; @@ -52,7 +53,7 @@ void nft_byteorder_eval(const struct nft_expr *expr, for (i = 0; i < priv->len / 8; i++) { src64 = (__force __u64) cpu_to_be64(nft_reg_load64(&src[i])); - nft_reg_store64(&dst[i], src64); + nft_reg_store64(&dst64[i], src64); } break; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index f7da7c43333b5ae4d320813179ea6b54742f91da..ba0d3683a45d32aae8e7240b035c7facc604aea0 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -63,7 +63,7 @@ nft_meta_get_eval_time(enum nft_meta_keys key, { switch (key) { case NFT_META_TIME_NS: - nft_reg_store64(dest, ktime_get_real_ns()); + nft_reg_store64((u64 *)dest, ktime_get_real_ns()); break; case NFT_META_TIME_DAY: nft_reg_store8(dest, nft_meta_weekday()); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 6f1186abd47b4366e10f09f9dbed80101bbdf69c..baa3fea4fe65c8f938e665a7fb6b0e4fc0f8f9ad 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -624,14 +624,12 @@ static void nft_rbtree_gc(struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe, *rbe_end = NULL; - struct nftables_pernet *nft_net; struct rb_node *node, *next; struct nft_trans_gc *gc; struct net *net; set = nft_set_container_of(priv); net = read_pnet(&set->net); - nft_net = nft_pernet(net); gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); if (!gc) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 0db0ecf1d11038a49e487e36b2eb33a028ae8727..b3f4a503ee2ba4fd9620567208e2e77ed80b41b1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1549,6 +1549,9 @@ static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data, if (bind) { struct flow_action_entry *entry = entry_data; + if (tcf_ct_helper(act)) + return -EOPNOTSUPP; + entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 5bc076f2fa74a295d5236ffd359f8d1b11f6ea53..c763008a8adbaa0538615431984c4f95926bcc3f 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -102,6 +102,7 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) return -EMSGSIZE; skb_put(skb, TLV_SPACE(len)); + memset(tlv, 0, TLV_SPACE(len)); tlv->tlv_type = htons(type); tlv->tlv_len = htons(TLV_LENGTH(len)); if (len && data) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 45506a95b25f8acdb99699c3c9256f50d0e7e5d0..a357dc5f24046d98674da9935eccbb9e18ea4616 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2581,15 +2581,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) if (!(state->flags & MSG_PEEK)) WRITE_ONCE(u->oob_skb, NULL); - + else + skb_get(oob_skb); unix_state_unlock(sk); chunk = state->recv_actor(oob_skb, 0, chunk, state); - if (!(state->flags & MSG_PEEK)) { + if (!(state->flags & MSG_PEEK)) UNIXCB(oob_skb).consumed += 1; - kfree_skb(oob_skb); - } + + consume_skb(oob_skb); mutex_unlock(&u->iolock); diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 68d0134bdbf9d1488a1e4881b6de77016d6e8e75..1a965fe68e011196d476a5d422bab347077112cd 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -487,14 +487,14 @@ UIMAGE_OPTS-y ?= UIMAGE_TYPE ?= kernel UIMAGE_LOADADDR ?= arch_must_set_this UIMAGE_ENTRYADDR ?= $(UIMAGE_LOADADDR) -UIMAGE_NAME ?= 'Linux-$(KERNELRELEASE)' +UIMAGE_NAME ?= Linux-$(KERNELRELEASE) quiet_cmd_uimage = UIMAGE $@ cmd_uimage = $(BASH) $(MKIMAGE) -A $(UIMAGE_ARCH) -O linux \ -C $(UIMAGE_COMPRESSION) $(UIMAGE_OPTS-y) \ -T $(UIMAGE_TYPE) \ -a $(UIMAGE_LOADADDR) -e $(UIMAGE_ENTRYADDR) \ - -n $(UIMAGE_NAME) -d $< $@ + -n '$(UIMAGE_NAME)' -d $< $@ # XZ # --------------------------------------------------------------------------- diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 39e86be60dd2d7a1fc8d8689f940e437a03dfb1b..ff0b192be91ff6efd3ac17d1c0850bfac92f31bc 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -17,7 +17,7 @@ * if (argc <= 1) * printf("%s: no command arguments :(\n", *argv); * else - * printf("%s: %d command arguments!\n", *argv, args - 1); + * printf("%s: %d command arguments!\n", *argv, argc - 1); * } * * after: @@ -47,7 +47,7 @@ * // perturb_local_entropy() * } else { * local_entropy ^= 3896280633962944730; - * printf("%s: %d command arguments!\n", *argv, args - 1); + * printf("%s: %d command arguments!\n", *argv, argc - 1); * } * * // latent_entropy_execute() 4. diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 366395cab490dec0cd80da7623727122438e78ab..910bd21d08f48db58d36db2a1222c2cca33782f4 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -278,8 +278,6 @@ static bool is_flexible_array(const_tree field) { const_tree fieldtype; const_tree typesize; - const_tree elemtype; - const_tree elemsize; fieldtype = TREE_TYPE(field); typesize = TYPE_SIZE(fieldtype); @@ -287,20 +285,12 @@ static bool is_flexible_array(const_tree field) if (TREE_CODE(fieldtype) != ARRAY_TYPE) return false; - elemtype = TREE_TYPE(fieldtype); - elemsize = TYPE_SIZE(elemtype); - /* size of type is represented in bits */ if (typesize == NULL_TREE && TYPE_DOMAIN(fieldtype) != NULL_TREE && TYPE_MAX_VALUE(TYPE_DOMAIN(fieldtype)) == NULL_TREE) return true; - if (typesize != NULL_TREE && - (TREE_CONSTANT(typesize) && (!tree_to_uhwi(typesize) || - tree_to_uhwi(typesize) == tree_to_uhwi(elemsize)))) - return true; - return false; } diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 0572330bf8a78aed51877dfa7429aab23d55069d..a76925b46ce6309439ec0a554775dbbf2dd445cd 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base) static void sym_validate_range(struct symbol *sym) { struct property *prop; + struct symbol *range_sym; int base; long long val, val2; - char str[64]; switch (sym->type) { case S_INT: @@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym) if (!prop) return; val = strtoll(sym->curr.val, NULL, base); - val2 = sym_get_range_val(prop->expr->left.sym, base); + range_sym = prop->expr->left.sym; + val2 = sym_get_range_val(range_sym, base); if (val >= val2) { - val2 = sym_get_range_val(prop->expr->right.sym, base); + range_sym = prop->expr->right.sym; + val2 = sym_get_range_val(range_sym, base); if (val <= val2) return; } - if (sym->type == S_INT) - sprintf(str, "%lld", val2); - else - sprintf(str, "0x%llx", val2); - sym->curr.val = xstrdup(str); + sym->curr.val = range_sym->curr.val; } static void sym_set_changed(struct symbol *sym) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 973b5e5ae2dddc955b01bab3fff585d58421dbea..cb6406f485a960041db048a5f5e8ae5bfad40bb5 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1383,13 +1383,15 @@ static void section_rela(struct module *mod, struct elf_info *elf, const Elf_Rela *rela; for (rela = start; rela < stop; rela++) { + Elf_Sym *tsym; Elf_Addr taddr, r_offset; unsigned int r_type, r_sym; r_offset = TO_NATIVE(rela->r_offset); get_rel_type_and_sym(elf, rela->r_info, &r_type, &r_sym); - taddr = TO_NATIVE(rela->r_addend); + tsym = elf->symtab_start + r_sym; + taddr = tsym->st_value + TO_NATIVE(rela->r_addend); switch (elf->hdr->e_machine) { case EM_RISCV: @@ -1404,7 +1406,7 @@ static void section_rela(struct module *mod, struct elf_info *elf, break; } - check_section_mismatch(mod, elf, elf->symtab_start + r_sym, + check_section_mismatch(mod, elf, tsym, fsecndx, fromsec, r_offset, taddr); } } diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c index 757a4d193e0fb5919778847693a3353f2825265b..a9ef6d86de8397ece6aaebce969199294f39ce32 100644 --- a/sound/pci/hda/cs35l56_hda_i2c.c +++ b/sound/pci/hda/cs35l56_hda_i2c.c @@ -21,6 +21,10 @@ static int cs35l56_hda_i2c_probe(struct i2c_client *clt) return -ENOMEM; cs35l56->base.dev = &clt->dev; + +#ifdef CS35L56_WAKE_HOLD_TIME_US + cs35l56->base.can_hibernate = true; +#endif cs35l56->base.regmap = devm_regmap_init_i2c(clt, &cs35l56_regmap_i2c); if (IS_ERR(cs35l56->base.regmap)) { ret = PTR_ERR(cs35l56->base.regmap); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 03264915c618332a214fe1ee35678502625f8765..db90feb49c16e2ca0104be952efd32279837bd61 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2135,6 +2135,9 @@ static int azx_probe(struct pci_dev *pci, if (chip->driver_caps & AZX_DCAPS_I915_COMPONENT) { err = snd_hdac_i915_init(azx_bus(chip)); if (err < 0) { + if (err == -EPROBE_DEFER) + goto out_free; + /* if the controller is bound only with HDMI/DP * (for HSW and BDW), we need to abort the probe; * for other chips, still continue probing as other diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 669ae3d6e447e6058dfc2b35d188c8eb590f0d32..5618b1d9bfd130d1d99fa2d9feea6981267316a5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9832,6 +9832,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8902, "HP OMEN 16", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x890e, "HP 255 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8919, "HP Pavilion Aero Laptop 13-be0xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x896d, "HP ZBook Firefly 16 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), @@ -9867,6 +9868,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), @@ -9900,12 +9902,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK), SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x10d3, "ASUS K6500ZC", ALC294_FIXUP_ASUS_SPK), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -9944,13 +9950,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC), @@ -10821,22 +10831,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60130}, {0x17, 0x90170110}, {0x21, 0x03211020}), - SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, - {0x14, 0x90170110}, - {0x21, 0x04211020}), - SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, - {0x14, 0x90170110}, - {0x21, 0x04211030}), - SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, - ALC295_STANDARD_PINS, - {0x17, 0x21014020}, - {0x18, 0x21a19030}), - SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, - ALC295_STANDARD_PINS, - {0x17, 0x21014040}, - {0x18, 0x21a19050}), - SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, - ALC295_STANDARD_PINS), SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ALC298_STANDARD_PINS, {0x17, 0x90170110}), @@ -10880,6 +10874,9 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, {0x19, 0x40000000}, {0x1b, 0x40000000}), + SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, + {0x19, 0x40000000}, + {0x1b, 0x40000000}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x19, 0x40000000}, {0x1a, 0x40000000}), diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9a10512e340787decdeee31ca29b740d02e16ee2..7a334377f92b978fa642a0071b19f33d7e6fe74e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -211,9 +211,6 @@ int *fd_instr_count_percpu; struct timeval interval_tv = { 5, 0 }; struct timespec interval_ts = { 5, 0 }; -/* Save original CPU model */ -unsigned int model_orig; - unsigned int num_iterations; unsigned int header_iterations; unsigned int debug; @@ -224,24 +221,16 @@ unsigned int rapl_joules; unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; -unsigned int do_snb_cstates; -unsigned int do_knl_cstates; -unsigned int do_slm_cstates; -unsigned int use_c1_residency_msr; unsigned int has_aperf; unsigned int has_epb; unsigned int has_turbo; unsigned int is_hybrid; -unsigned int do_irtl_snb; -unsigned int do_irtl_hsw; unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int authentic_amd; unsigned int hygon_genuine; unsigned int max_level, max_extended_level; unsigned int has_invariant_tsc; -unsigned int do_nhm_platform_info; -unsigned int no_MSR_MISC_PWR_MGMT; unsigned int aperf_mperf_multiplier = 1; double bclk; double base_hz; @@ -250,7 +239,6 @@ double tsc_tweak = 1.0; unsigned int show_pkg_only; unsigned int show_core_only; char *output_buffer, *outp; -unsigned int do_rapl; unsigned int do_dts; unsigned int do_ptm; unsigned int do_ipc; @@ -261,65 +249,686 @@ unsigned int gfx_cur_mhz; unsigned int gfx_act_mhz; unsigned int tj_max; unsigned int tj_max_override; -int tcc_offset_bits; double rapl_power_units, rapl_time_units; double rapl_dram_energy_units, rapl_energy_units; double rapl_joule_counter_range; -unsigned int do_core_perf_limit_reasons; -unsigned int has_automatic_cstate_conversion; -unsigned int dis_cstate_prewake; -unsigned int do_gfx_perf_limit_reasons; -unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; -double discover_bclk(unsigned int family, unsigned int model); unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ -unsigned int has_misc_feature_control; unsigned int first_counter_read = 1; int ignore_stdin; -#define RAPL_PKG (1 << 0) - /* 0x610 MSR_PKG_POWER_LIMIT */ - /* 0x611 MSR_PKG_ENERGY_STATUS */ -#define RAPL_PKG_PERF_STATUS (1 << 1) - /* 0x613 MSR_PKG_PERF_STATUS */ -#define RAPL_PKG_POWER_INFO (1 << 2) - /* 0x614 MSR_PKG_POWER_INFO */ - -#define RAPL_DRAM (1 << 3) - /* 0x618 MSR_DRAM_POWER_LIMIT */ - /* 0x619 MSR_DRAM_ENERGY_STATUS */ -#define RAPL_DRAM_PERF_STATUS (1 << 4) - /* 0x61b MSR_DRAM_PERF_STATUS */ -#define RAPL_DRAM_POWER_INFO (1 << 5) - /* 0x61c MSR_DRAM_POWER_INFO */ - -#define RAPL_CORES_POWER_LIMIT (1 << 6) - /* 0x638 MSR_PP0_POWER_LIMIT */ -#define RAPL_CORE_POLICY (1 << 7) - /* 0x63a MSR_PP0_POLICY */ - -#define RAPL_GFX (1 << 8) - /* 0x640 MSR_PP1_POWER_LIMIT */ - /* 0x641 MSR_PP1_ENERGY_STATUS */ - /* 0x642 MSR_PP1_POLICY */ - -#define RAPL_CORES_ENERGY_STATUS (1 << 9) - /* 0x639 MSR_PP0_ENERGY_STATUS */ -#define RAPL_PER_CORE_ENERGY (1 << 10) - /* Indicates cores energy collection is per-core, - * not per-package. */ -#define RAPL_AMD_F17H (1 << 11) - /* 0xc0010299 MSR_RAPL_PWR_UNIT */ - /* 0xc001029a MSR_CORE_ENERGY_STAT */ - /* 0xc001029b MSR_PKG_ENERGY_STAT */ -#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT) +int get_msr(int cpu, off_t offset, unsigned long long *msr); + +/* Model specific support Start */ + +/* List of features that may diverge among different platforms */ +struct platform_features { + bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */ + bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */ + bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */ + bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */ + int bclk_freq; /* CPU base clock */ + int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */ + int supported_cstates; /* Core cstates and Package cstates supported */ + int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */ + bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */ + bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */ + bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */ + bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */ + bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */ + bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */ + bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */ + bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */ + bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ + int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ + int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ + int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ + bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ + bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ + bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ + int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ + int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ + bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ + bool need_perf_multiplier; /* mperf/aperf multiplier */ +}; + +struct platform_data { + unsigned int model; + const struct platform_features *features; +}; + +/* For BCLK */ +enum bclk_freq { + BCLK_100MHZ = 1, + BCLK_133MHZ, + BCLK_SLV, +}; + +#define SLM_BCLK_FREQS 5 +double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; + +double slm_bclk(void) +{ + unsigned long long msr = 3; + unsigned int i; + double freq; + + if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) + fprintf(outf, "SLM BCLK: unknown\n"); + + i = msr & 0xf; + if (i >= SLM_BCLK_FREQS) { + fprintf(outf, "SLM BCLK[%d] invalid\n", i); + i = 3; + } + freq = slm_freq_table[i]; + + if (!quiet) + fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); + + return freq; +} + +/* For Package cstate limit */ +enum package_cstate_limit { + CST_LIMIT_NHM = 1, + CST_LIMIT_SNB, + CST_LIMIT_HSW, + CST_LIMIT_SKX, + CST_LIMIT_ICX, + CST_LIMIT_SLV, + CST_LIMIT_AMT, + CST_LIMIT_KNL, + CST_LIMIT_GMT, +}; + +/* For Turbo Ratio Limit MSRs */ +enum turbo_ratio_limit_msrs { + TRL_BASE = BIT(0), + TRL_LIMIT1 = BIT(1), + TRL_LIMIT2 = BIT(2), + TRL_ATOM = BIT(3), + TRL_KNL = BIT(4), + TRL_CORECOUNT = BIT(5), +}; + +/* For Perf Limit Reason MSRs */ +enum perf_limit_reason_msrs { + PLR_CORE = BIT(0), + PLR_GFX = BIT(1), + PLR_RING = BIT(2), +}; + +/* For RAPL MSRs */ +enum rapl_msrs { + RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */ + RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */ + RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */ + RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */ + RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */ + RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */ + RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */ + RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */ + RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */ + RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */ + RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */ + RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */ + RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */ + RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */ + RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ + RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ + RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ +}; + +#define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) +#define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) +#define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) +#define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) + +#define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) +#define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) +#define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY) +#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY) + +#define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT) + +/* For Cstates */ +enum cstates { + CC1 = BIT(0), + CC3 = BIT(1), + CC6 = BIT(2), + CC7 = BIT(3), + PC2 = BIT(4), + PC3 = BIT(5), + PC6 = BIT(6), + PC7 = BIT(7), + PC8 = BIT(8), + PC9 = BIT(9), + PC10 = BIT(10), +}; + +static const struct platform_features nhm_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_133MHZ, + .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, + .cst_limit = CST_LIMIT_NHM, + .trl_msrs = TRL_BASE, +}; + +static const struct platform_features nhx_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_133MHZ, + .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, + .cst_limit = CST_LIMIT_NHM, +}; + +static const struct platform_features snb_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_SNB, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features snx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_SNB, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, +}; + +static const struct platform_features ivb_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_SNB, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features ivx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_SNB, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE | TRL_LIMIT1, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, +}; + +static const struct platform_features hsw_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features hsx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, + .plr_msrs = PLR_CORE | PLR_RING, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .has_fixed_rapl_unit = 1, +}; + +static const struct platform_features hswl_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features hswg_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features bdw_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features bdwg_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features bdx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .has_cst_auto_convension = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .has_fixed_rapl_unit = 1, +}; + +static const struct platform_features skl_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .crystal_freq = 24000000, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .has_ext_cst_msrs = 1, + .trl_msrs = TRL_BASE, + .tcc_offset_bits = 6, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .enable_tsc_tweak = 1, +}; + +static const struct platform_features cnl_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .has_msr_core_c1_res = 1, + .has_ext_cst_msrs = 1, + .trl_msrs = TRL_BASE, + .tcc_offset_bits = 6, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .enable_tsc_tweak = 1, +}; + +static const struct platform_features adl_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, + .cst_limit = CST_LIMIT_HSW, + .has_irtl_msrs = 1, + .has_msr_core_c1_res = 1, + .has_ext_cst_msrs = 1, + .trl_msrs = TRL_BASE, + .tcc_offset_bits = 6, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .enable_tsc_tweak = 1, +}; + +static const struct platform_features skx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | PC2 | PC6, + .cst_limit = CST_LIMIT_SKX, + .has_irtl_msrs = 1, + .has_cst_auto_convension = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .has_fixed_rapl_unit = 1, +}; + +static const struct platform_features icx_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | PC2 | PC6, + .cst_limit = CST_LIMIT_ICX, + .has_irtl_msrs = 1, + .has_cst_prewake_bit = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .has_fixed_rapl_unit = 1, +}; + +static const struct platform_features spr_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | PC2 | PC6, + .cst_limit = CST_LIMIT_SKX, + .has_msr_core_c1_res = 1, + .has_irtl_msrs = 1, + .has_cst_prewake_bit = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, +}; + +static const struct platform_features srf_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | PC2 | PC6, + .cst_limit = CST_LIMIT_SKX, + .has_msr_core_c1_res = 1, + .has_msr_module_c6_res_ms = 1, + .has_irtl_msrs = 1, + .has_cst_prewake_bit = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, +}; + +static const struct platform_features grr_features = { + .has_msr_misc_feature_control = 1, + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6, + .cst_limit = CST_LIMIT_SKX, + .has_msr_core_c1_res = 1, + .has_msr_module_c6_res_ms = 1, + .has_irtl_msrs = 1, + .has_cst_prewake_bit = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, +}; + +static const struct platform_features slv_features = { + .has_nhm_msrs = 1, + .bclk_freq = BCLK_SLV, + .supported_cstates = CC1 | CC6 | PC6, + .cst_limit = CST_LIMIT_SLV, + .has_msr_core_c1_res = 1, + .has_msr_module_c6_res_ms = 1, + .has_msr_c6_demotion_policy_config = 1, + .has_msr_atom_pkg_c6_residency = 1, + .trl_msrs = TRL_ATOM, + .rapl_msrs = RAPL_PKG | RAPL_CORE, + .has_rapl_divisor = 1, + .rapl_quirk_tdp = 30, +}; + +static const struct platform_features slvd_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_SLV, + .supported_cstates = CC1 | CC6 | PC3 | PC6, + .cst_limit = CST_LIMIT_SLV, + .has_msr_atom_pkg_c6_residency = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_CORE, + .rapl_quirk_tdp = 30, +}; + +static const struct platform_features amt_features = { + .has_nhm_msrs = 1, + .bclk_freq = BCLK_133MHZ, + .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, + .cst_limit = CST_LIMIT_AMT, + .trl_msrs = TRL_BASE, +}; + +static const struct platform_features gmt_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .crystal_freq = 19200000, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_GMT, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features gmtd_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .crystal_freq = 25000000, + .supported_cstates = CC1 | CC6 | PC2 | PC6, + .cst_limit = CST_LIMIT_GMT, + .has_irtl_msrs = 1, + .has_msr_core_c1_res = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, +}; + +static const struct platform_features gmtp_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .crystal_freq = 19200000, + .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_GMT, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, +}; + +static const struct platform_features tmt_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, + .cst_limit = CST_LIMIT_GMT, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .enable_tsc_tweak = 1, +}; + +static const struct platform_features tmtd_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6, + .cst_limit = CST_LIMIT_GMT, + .has_irtl_msrs = 1, + .trl_msrs = TRL_BASE | TRL_CORECOUNT, + .rapl_msrs = RAPL_PKG_ALL, +}; + +static const struct platform_features knl_features = { + .has_msr_misc_pwr_mgmt = 1, + .has_nhm_msrs = 1, + .has_config_tdp = 1, + .bclk_freq = BCLK_100MHZ, + .supported_cstates = CC1 | CC6 | PC3 | PC6, + .cst_limit = CST_LIMIT_KNL, + .has_msr_knl_core_c6_residency = 1, + .trl_msrs = TRL_KNL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .has_fixed_rapl_unit = 1, + .need_perf_multiplier = 1, +}; + +static const struct platform_features default_features = { +}; + +static const struct platform_features amd_features_with_rapl = { + .rapl_msrs = RAPL_AMD_F17H, + .has_per_core_rapl = 1, + .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ +}; + +static const struct platform_data turbostat_pdata[] = { + { INTEL_FAM6_NEHALEM, &nhm_features }, + { INTEL_FAM6_NEHALEM_G, &nhm_features }, + { INTEL_FAM6_NEHALEM_EP, &nhm_features }, + { INTEL_FAM6_NEHALEM_EX, &nhx_features }, + { INTEL_FAM6_WESTMERE, &nhm_features }, + { INTEL_FAM6_WESTMERE_EP, &nhm_features }, + { INTEL_FAM6_WESTMERE_EX, &nhx_features }, + { INTEL_FAM6_SANDYBRIDGE, &snb_features }, + { INTEL_FAM6_SANDYBRIDGE_X, &snx_features }, + { INTEL_FAM6_IVYBRIDGE, &ivb_features }, + { INTEL_FAM6_IVYBRIDGE_X, &ivx_features }, + { INTEL_FAM6_HASWELL, &hsw_features }, + { INTEL_FAM6_HASWELL_X, &hsx_features }, + { INTEL_FAM6_HASWELL_L, &hswl_features }, + { INTEL_FAM6_HASWELL_G, &hswg_features }, + { INTEL_FAM6_BROADWELL, &bdw_features }, + { INTEL_FAM6_BROADWELL_G, &bdwg_features }, + { INTEL_FAM6_BROADWELL_X, &bdx_features }, + { INTEL_FAM6_BROADWELL_D, &bdx_features }, + { INTEL_FAM6_SKYLAKE_L, &skl_features }, + { INTEL_FAM6_SKYLAKE, &skl_features }, + { INTEL_FAM6_SKYLAKE_X, &skx_features }, + { INTEL_FAM6_KABYLAKE_L, &skl_features }, + { INTEL_FAM6_KABYLAKE, &skl_features }, + { INTEL_FAM6_COMETLAKE, &skl_features }, + { INTEL_FAM6_COMETLAKE_L, &skl_features }, + { INTEL_FAM6_CANNONLAKE_L, &cnl_features }, + { INTEL_FAM6_ICELAKE_X, &icx_features }, + { INTEL_FAM6_ICELAKE_D, &icx_features }, + { INTEL_FAM6_ICELAKE_L, &cnl_features }, + { INTEL_FAM6_ICELAKE_NNPI, &cnl_features }, + { INTEL_FAM6_ROCKETLAKE, &cnl_features }, + { INTEL_FAM6_TIGERLAKE_L, &cnl_features }, + { INTEL_FAM6_TIGERLAKE, &cnl_features }, + { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features }, + { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features }, + { INTEL_FAM6_GRANITERAPIDS_X, &spr_features }, + { INTEL_FAM6_LAKEFIELD, &cnl_features }, + { INTEL_FAM6_ALDERLAKE, &adl_features }, + { INTEL_FAM6_ALDERLAKE_L, &adl_features }, + { INTEL_FAM6_RAPTORLAKE, &adl_features }, + { INTEL_FAM6_RAPTORLAKE_P, &adl_features }, + { INTEL_FAM6_RAPTORLAKE_S, &adl_features }, + { INTEL_FAM6_METEORLAKE, &cnl_features }, + { INTEL_FAM6_METEORLAKE_L, &cnl_features }, + { INTEL_FAM6_ARROWLAKE, &cnl_features }, + { INTEL_FAM6_LUNARLAKE_M, &cnl_features }, + { INTEL_FAM6_ATOM_SILVERMONT, &slv_features }, + { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features }, + { INTEL_FAM6_ATOM_AIRMONT, &amt_features }, + { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features }, + { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features }, + { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features }, + { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features }, + { INTEL_FAM6_ATOM_TREMONT, &tmt_features }, + { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features }, + { INTEL_FAM6_ATOM_GRACEMONT, &adl_features }, + { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features }, + { INTEL_FAM6_ATOM_CRESTMONT, &grr_features }, + { INTEL_FAM6_XEON_PHI_KNL, &knl_features }, + { INTEL_FAM6_XEON_PHI_KNM, &knl_features }, + /* + * Missing support for + * INTEL_FAM6_ICELAKE + * INTEL_FAM6_ATOM_SILVERMONT_MID + * INTEL_FAM6_ATOM_AIRMONT_MID + * INTEL_FAM6_ATOM_AIRMONT_NP + */ + { 0, NULL }, +}; + +static const struct platform_features *platform; + +void probe_platform_features(unsigned int family, unsigned int model) +{ + int i; + + platform = &default_features; + + if (authentic_amd || hygon_genuine) { + if (max_extended_level >= 0x80000007) { + unsigned int eax, ebx, ecx, edx; + + __cpuid(0x80000007, eax, ebx, ecx, edx); + /* RAPL (Fam 17h+) */ + if ((edx & (1 << 14)) && family >= 0x17) + platform = &amd_features_with_rapl; + } + return; + } + + if (!genuine_intel || family != 6) + return; + + for (i = 0; turbostat_pdata[i].features; i++) { + if (turbostat_pdata[i].model == model) { + platform = turbostat_pdata[i].features; + return; + } + } +} + +/* Model specific support End */ + #define TJMAX_DEFAULT 100 /* MSRs that are not yet in the kernel-provided header. */ @@ -333,8 +942,8 @@ int backwards_count; char *progname; #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ -cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset; -size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; +cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; +size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_COUNTERS 8 #define MAX_ADDED_THREAD_COUNTERS 24 #define BITMASK_SIZE 32 @@ -355,12 +964,11 @@ struct thread_data { unsigned int x2apic_id; unsigned int flags; bool is_atom; -#define CPU_IS_FIRST_THREAD_IN_CORE 0x2 -#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; } *thread_even, *thread_odd; struct core_data { + int base_cpu; unsigned long long c3; unsigned long long c6; unsigned long long c7; @@ -373,6 +981,7 @@ struct core_data { } *core_even, *core_odd; struct pkg_data { + int base_cpu; unsigned long long pc2; unsigned long long pc3; unsigned long long pc6; @@ -456,7 +1065,7 @@ off_t idx_to_offset(int idx) switch (idx) { case IDX_PKG_ENERGY: - if (do_rapl & RAPL_AMD_F17H) + if (platform->rapl_msrs & RAPL_AMD_F17H) offset = MSR_PKG_ENERGY_STAT; else offset = MSR_PKG_ENERGY_STATUS; @@ -516,17 +1125,17 @@ int idx_valid(int idx) { switch (idx) { case IDX_PKG_ENERGY: - return do_rapl & (RAPL_PKG | RAPL_AMD_F17H); + return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); case IDX_DRAM_ENERGY: - return do_rapl & RAPL_DRAM; + return platform->rapl_msrs & RAPL_DRAM; case IDX_PP0_ENERGY: - return do_rapl & RAPL_CORES_ENERGY_STATUS; + return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; case IDX_PP1_ENERGY: - return do_rapl & RAPL_GFX; + return platform->rapl_msrs & RAPL_GFX; case IDX_PKG_PERF: - return do_rapl & RAPL_PKG_PERF_STATUS; + return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; case IDX_DRAM_PERF: - return do_rapl & RAPL_DRAM_PERF_STATUS; + return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; default: return 0; } @@ -563,6 +1172,9 @@ struct topo_params { int num_die; int num_cpus; int num_cores; + int allowed_packages; + int allowed_cpus; + int allowed_cores; int max_cpu_num; int max_node_num; int nodes_per_pkg; @@ -575,7 +1187,7 @@ struct timeval tv_even, tv_odd, tv_delta; int *irq_column_2_cpu; /* /proc/interrupts column numbers */ int *irqs_per_cpu; /* indexed by cpu_num */ -void setup_all_buffers(void); +void setup_all_buffers(bool startup); char *sys_lpi_file; char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; @@ -586,6 +1198,11 @@ int cpu_is_not_present(int cpu) return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); } +int cpu_is_not_allowed(int cpu) +{ + return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set); +} + /* * run func(thread, core, package) in topology order * skip non-present cpus @@ -603,10 +1220,9 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk struct thread_data *t; struct core_data *c; struct pkg_data *p; - t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); - if (cpu_is_not_present(t->cpu_id)) + if (cpu_is_not_allowed(t->cpu_id)) continue; c = GET_CORE(core_base, core_no, node_no, pkg_no); @@ -622,6 +1238,25 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk return 0; } +int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + UNUSED(p); + + return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); +} + +int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + UNUSED(c); + + return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); +} + +int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); +} + int cpu_migrate(int cpu) { CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); @@ -904,11 +1539,11 @@ void print_header(char *delim) if (DO_BIC(BIC_CORE_THROT_CNT)) outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); - if (do_rapl && !rapl_joules) { - if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + if (platform->rapl_msrs && !rapl_joules) { + if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); - } else if (do_rapl && rapl_joules) { - if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + } else if (platform->rapl_msrs && rapl_joules) { + if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); } @@ -966,10 +1601,10 @@ void print_header(char *delim) if (DO_BIC(BIC_SYS_LPI)) outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); - if (do_rapl && !rapl_joules) { + if (platform->rapl_msrs && !rapl_joules) { if (DO_BIC(BIC_PkgWatt)) outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); - if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); if (DO_BIC(BIC_GFXWatt)) outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); @@ -979,10 +1614,10 @@ void print_header(char *delim) outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); if (DO_BIC(BIC_RAM__)) outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); - } else if (do_rapl && rapl_joules) { + } else if (platform->rapl_msrs && rapl_joules) { if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); - if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); if (DO_BIC(BIC_GFX_J)) outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); @@ -1106,11 +1741,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data int printed = 0; /* if showing only 1st thread in core and this isn't one, bail out */ - if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) return 0; /* if showing only 1st thread in pkg and this isn't one, bail out */ - if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) return 0; /*if not summary line and --cpu is used */ @@ -1244,7 +1879,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); /* print per-core data only for 1st thread in core */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + if (!is_cpu_first_thread_in_core(t, c, p)) goto done; if (DO_BIC(BIC_CPU_c3)) @@ -1284,14 +1919,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data fmt8 = "%s%.2f"; - if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); - if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units); /* print per-package data only for 1st core in package */ - if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_core_in_package(t, c, p)) goto done; /* PkgTmp */ @@ -1352,7 +1987,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); - if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); if (DO_BIC(BIC_GFXWatt)) @@ -1364,7 +1999,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->energy_dram * rapl_dram_energy_units / interval_float); if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units); - if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units); if (DO_BIC(BIC_GFX_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units); @@ -1527,7 +2162,7 @@ void delta_core(struct core_data *new, struct core_data *old) int soft_c1_residency_display(int bic) { - if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr) + if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res) return 0; return DO_BIC_READ(bic); @@ -1567,7 +2202,8 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d old->c1 = new->c1 - old->c1; - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) { + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) + || soft_c1_residency_display(BIC_Avg_MHz)) { if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { old->aperf = new->aperf - old->aperf; old->mperf = new->mperf - old->mperf; @@ -1576,7 +2212,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d } } - if (use_c1_residency_msr) { + if (platform->has_msr_core_c1_res) { /* * Some models have a dedicated C1 residency MSR, * which should be more accurate than the derivation below. @@ -1626,7 +2262,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c, int retval = 0; /* calculate core delta only for 1st thread in core */ - if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) + if (is_cpu_first_thread_in_core(t, c, p)) delta_core(c, c2); /* always calculate thread delta */ @@ -1635,7 +2271,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c, return retval; /* calculate package delta only for 1st core in package */ - if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) + if (is_cpu_first_core_in_package(t, c, p)) retval = delta_package(p, p2); return retval; @@ -1663,9 +2299,6 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->irq_count = 0; t->smi_count = 0; - /* tells format_counters to dump all fields from this set */ - t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; - c->c3 = 0; c->c6 = 0; c->c7 = 0; @@ -1749,7 +2382,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) } /* sum per-core values only for 1st thread in core */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + if (!is_cpu_first_thread_in_core(t, c, p)) return 0; average.cores.c3 += c->c3; @@ -1769,7 +2402,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) } /* sum per-pkg values only for 1st core in pkg */ - if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_core_in_package(t, c, p)) return 0; if (DO_BIC(BIC_Totl_c0)) @@ -1834,40 +2467,40 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data /* Use the global time delta for the average. */ average.threads.tv_delta = tv_delta; - average.threads.tsc /= topo.num_cpus; - average.threads.aperf /= topo.num_cpus; - average.threads.mperf /= topo.num_cpus; - average.threads.instr_count /= topo.num_cpus; - average.threads.c1 /= topo.num_cpus; + average.threads.tsc /= topo.allowed_cpus; + average.threads.aperf /= topo.allowed_cpus; + average.threads.mperf /= topo.allowed_cpus; + average.threads.instr_count /= topo.allowed_cpus; + average.threads.c1 /= topo.allowed_cpus; if (average.threads.irq_count > 9999999) sums_need_wide_columns = 1; - average.cores.c3 /= topo.num_cores; - average.cores.c6 /= topo.num_cores; - average.cores.c7 /= topo.num_cores; - average.cores.mc6_us /= topo.num_cores; + average.cores.c3 /= topo.allowed_cores; + average.cores.c6 /= topo.allowed_cores; + average.cores.c7 /= topo.allowed_cores; + average.cores.mc6_us /= topo.allowed_cores; if (DO_BIC(BIC_Totl_c0)) - average.packages.pkg_wtd_core_c0 /= topo.num_packages; + average.packages.pkg_wtd_core_c0 /= topo.allowed_packages; if (DO_BIC(BIC_Any_c0)) - average.packages.pkg_any_core_c0 /= topo.num_packages; + average.packages.pkg_any_core_c0 /= topo.allowed_packages; if (DO_BIC(BIC_GFX_c0)) - average.packages.pkg_any_gfxe_c0 /= topo.num_packages; + average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages; if (DO_BIC(BIC_CPUGFX)) - average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages; + average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages; - average.packages.pc2 /= topo.num_packages; + average.packages.pc2 /= topo.allowed_packages; if (DO_BIC(BIC_Pkgpc3)) - average.packages.pc3 /= topo.num_packages; + average.packages.pc3 /= topo.allowed_packages; if (DO_BIC(BIC_Pkgpc6)) - average.packages.pc6 /= topo.num_packages; + average.packages.pc6 /= topo.allowed_packages; if (DO_BIC(BIC_Pkgpc7)) - average.packages.pc7 /= topo.num_packages; + average.packages.pc7 /= topo.allowed_packages; - average.packages.pc8 /= topo.num_packages; - average.packages.pc9 /= topo.num_packages; - average.packages.pc10 /= topo.num_packages; + average.packages.pc8 /= topo.allowed_packages; + average.packages.pc9 /= topo.allowed_packages; + average.packages.pc10 /= topo.allowed_packages; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -1877,7 +2510,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data sums_need_wide_columns = 1; continue; } - average.threads.counter[i] /= topo.num_cpus; + average.threads.counter[i] /= topo.allowed_cpus; } for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -1886,7 +2519,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data if (average.cores.counter[i] > 9999999) sums_need_wide_columns = 1; } - average.cores.counter[i] /= topo.num_cores; + average.cores.counter[i] /= topo.allowed_cores; } for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -1895,7 +2528,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data if (average.packages.counter[i] > 9999999) sums_need_wide_columns = 1; } - average.packages.counter[i] /= topo.num_packages; + average.packages.counter[i] /= topo.allowed_packages; } } @@ -2092,7 +2725,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) retry: t->tsc = rdtsc(); /* we are running on local CPU of interest */ - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) { + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) + || soft_c1_residency_display(BIC_Avg_MHz)) { unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; /* @@ -2158,7 +2792,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -5; t->smi_count = msr & 0xFFFFFFFF; } - if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) { + if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) { if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) return -6; } @@ -2169,7 +2803,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) } /* collect core counters only for 1st thread in core */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + if (!is_cpu_first_thread_in_core(t, c, p)) goto done; if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) { @@ -2177,10 +2811,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -6; } - if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) { + if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) { if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) return -7; - } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) { + } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) { if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) return -7; } @@ -2212,7 +2846,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (DO_BIC(BIC_CORE_THROT_CNT)) get_core_throt_cnt(cpu, &c->core_throt_cnt); - if (do_rapl & RAPL_AMD_F17H) { + if (platform->rapl_msrs & RAPL_AMD_F17H) { if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr)) return -14; c->core_energy = msr & 0xFFFFFFFF; @@ -2224,7 +2858,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) } /* collect package counters only for 1st core in package */ - if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_core_in_package(t, c, p)) goto done; if (DO_BIC(BIC_Totl_c0)) { @@ -2247,7 +2881,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) return -9; if (DO_BIC(BIC_Pkgpc6)) { - if (do_slm_cstates) { + if (platform->has_msr_atom_pkg_c6_residency) { if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6)) return -10; } else { @@ -2277,37 +2911,37 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (DO_BIC(BIC_SYS_LPI)) p->sys_lpi = cpuidle_cur_sys_lpi_us; - if (do_rapl & RAPL_PKG) { + if (platform->rapl_msrs & RAPL_PKG) { if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr)) return -13; p->energy_pkg = msr; } - if (do_rapl & RAPL_CORES_ENERGY_STATUS) { + if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) { if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr)) return -14; p->energy_cores = msr; } - if (do_rapl & RAPL_DRAM) { + if (platform->rapl_msrs & RAPL_DRAM) { if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) return -15; p->energy_dram = msr; } - if (do_rapl & RAPL_GFX) { + if (platform->rapl_msrs & RAPL_GFX) { if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr)) return -16; p->energy_gfx = msr; } - if (do_rapl & RAPL_PKG_PERF_STATUS) { + if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) { if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr)) return -16; p->rapl_pkg_perf_status = msr; } - if (do_rapl & RAPL_DRAM_PERF_STATUS) { + if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) { if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr)) return -16; p->rapl_dram_perf_status = msr; } - if (do_rapl & RAPL_AMD_F17H) { + if (platform->rapl_msrs & RAPL_AMD_F17H) { if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr)) return -13; p->energy_pkg = msr; @@ -2414,18 +3048,58 @@ int icx_pkg_cstate_limits[16] = PCLRSV, PCLRSV }; -static void calculate_tsc_tweak() +void probe_cst_limit(void) { - tsc_tweak = base_hz / tsc_hz; -} + unsigned long long msr; + int *pkg_cstate_limits; + + if (!platform->has_nhm_msrs) + return; + + switch (platform->cst_limit) { + case CST_LIMIT_NHM: + pkg_cstate_limits = nhm_pkg_cstate_limits; + break; + case CST_LIMIT_SNB: + pkg_cstate_limits = snb_pkg_cstate_limits; + break; + case CST_LIMIT_HSW: + pkg_cstate_limits = hsw_pkg_cstate_limits; + break; + case CST_LIMIT_SKX: + pkg_cstate_limits = skx_pkg_cstate_limits; + break; + case CST_LIMIT_ICX: + pkg_cstate_limits = icx_pkg_cstate_limits; + break; + case CST_LIMIT_SLV: + pkg_cstate_limits = slv_pkg_cstate_limits; + break; + case CST_LIMIT_AMT: + pkg_cstate_limits = amt_pkg_cstate_limits; + break; + case CST_LIMIT_KNL: + pkg_cstate_limits = phi_pkg_cstate_limits; + break; + case CST_LIMIT_GMT: + pkg_cstate_limits = glm_pkg_cstate_limits; + break; + default: + return; + } -void prewake_cstate_probe(unsigned int family, unsigned int model); + get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); + pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; +} -static void dump_nhm_platform_info(void) +static void dump_platform_info(void) { unsigned long long msr; unsigned int ratio; + if (!platform->has_nhm_msrs) + return; + get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); @@ -2435,19 +3109,27 @@ static void dump_nhm_platform_info(void) ratio = (msr >> 8) & 0xFF; fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); +} + +static void dump_power_ctl(void) +{ + unsigned long long msr; + + if (!platform->has_nhm_msrs) + return; get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ - if (dis_cstate_prewake) + if (platform->has_cst_prewake_bit) fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); return; } -static void dump_hsw_turbo_ratio_limits(void) +static void dump_turbo_ratio_limit2(void) { unsigned long long msr; unsigned int ratio; @@ -2466,7 +3148,7 @@ static void dump_hsw_turbo_ratio_limits(void) return; } -static void dump_ivt_turbo_ratio_limits(void) +static void dump_turbo_ratio_limit1(void) { unsigned long long msr; unsigned int ratio; @@ -2509,29 +3191,7 @@ static void dump_ivt_turbo_ratio_limits(void) return; } -int has_turbo_ratio_group_limits(int family, int model) -{ - - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_GOLDMONT: - case INTEL_FAM6_SKYLAKE_X: - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_SAPPHIRERAPIDS_X: - case INTEL_FAM6_ATOM_GOLDMONT_D: - case INTEL_FAM6_ATOM_TREMONT_D: - return 1; - default: - return 0; - } -} - -static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model) +static void dump_turbo_ratio_limits(int trl_msr_offset) { unsigned long long msr, core_counts; int shift; @@ -2540,7 +3200,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model) fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr); - if (has_turbo_ratio_group_limits(family, model)) { + if (platform->trl_msrs & TRL_CORECOUNT) { get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); } else { @@ -2657,10 +3317,13 @@ static void dump_knl_turbo_ratio_limits(void) ratio[i], bclk, ratio[i] * bclk, cores[i]); } -static void dump_nhm_cst_cfg(void) +static void dump_cst_cfg(void) { unsigned long long msr; + if (!platform->has_nhm_msrs) + return; + get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); @@ -2673,7 +3336,7 @@ static void dump_nhm_cst_cfg(void) (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) - if (has_automatic_cstate_conversion) { + if (platform->has_cst_auto_convension) { fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); } @@ -2730,39 +3393,50 @@ void print_irtl(void) { unsigned long long msr; - get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); - - get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + if (!platform->has_irtl_msrs) + return; - get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + if (platform->supported_cstates & PC3) { + get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } - if (!do_irtl_hsw) - return; + if (platform->supported_cstates & PC6) { + get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } - get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + if (platform->supported_cstates & PC7) { + get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } - get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + if (platform->supported_cstates & PC8) { + get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } - get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); - fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + if (platform->supported_cstates & PC9) { + get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } + if (platform->supported_cstates & PC10) { + get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + } } void free_fd_percpu(void) @@ -2785,6 +3459,14 @@ void free_all_buffers(void) cpu_present_set = NULL; cpu_present_setsize = 0; + CPU_FREE(cpu_effective_set); + cpu_effective_set = NULL; + cpu_effective_setsize = 0; + + CPU_FREE(cpu_allowed_set); + cpu_allowed_set = NULL; + cpu_allowed_setsize = 0; + CPU_FREE(cpu_affinity_set); cpu_affinity_set = NULL; cpu_affinity_setsize = 0; @@ -2927,49 +3609,102 @@ int get_physical_node_id(struct cpu_topology *thiscpu) return -1; } -int get_thread_siblings(struct cpu_topology *thiscpu) +static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) { - char path[80], character; - FILE *filep; - unsigned long map; - int so, shift, sib_core; - int cpu = thiscpu->logical_cpu_id; - int offset = topo.max_cpu_num + 1; - size_t size; - int thread_id = 0; + unsigned int start, end; + char *next = cpu_str; - thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); - if (thiscpu->thread_id < 0) - thiscpu->thread_id = thread_id++; - if (!thiscpu->put_ids) - return -1; + while (next && *next) { - size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); - CPU_ZERO_S(size, thiscpu->put_ids); + if (*next == '-') /* no negative cpu numbers */ + return 1; - sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); - filep = fopen(path, "r"); + start = strtoul(next, &next, 10); - if (!filep) { - warnx("%s: open failed", path); - return -1; - } - do { - offset -= BITMASK_SIZE; - if (fscanf(filep, "%lx%c", &map, &character) != 2) - err(1, "%s: failed to parse file", path); - for (shift = 0; shift < BITMASK_SIZE; shift++) { - if ((map >> shift) & 0x1) { - so = shift + offset; - sib_core = get_core_id(so); - if (sib_core == thiscpu->physical_core_id) { - CPU_SET_S(so, size, thiscpu->put_ids); - if ((so != cpu) && (cpus[so].thread_id < 0)) - cpus[so].thread_id = thread_id++; - } - } - } - } while (character == ','); + if (start >= CPU_SUBSET_MAXCPUS) + return 1; + CPU_SET_S(start, cpu_set_size, cpu_set); + + if (*next == '\0' || *next == '\n') + break; + + if (*next == ',') { + next += 1; + continue; + } + + if (*next == '-') { + next += 1; /* start range */ + } else if (*next == '.') { + next += 1; + if (*next == '.') + next += 1; /* start range */ + else + return 1; + } + + end = strtoul(next, &next, 10); + if (end <= start) + return 1; + + while (++start <= end) { + if (start >= CPU_SUBSET_MAXCPUS) + return 1; + CPU_SET_S(start, cpu_set_size, cpu_set); + } + + if (*next == ',') + next += 1; + else if (*next != '\0' && *next != '\n') + return 1; + } + + return 0; +} + +int get_thread_siblings(struct cpu_topology *thiscpu) +{ + char path[80], character; + FILE *filep; + unsigned long map; + int so, shift, sib_core; + int cpu = thiscpu->logical_cpu_id; + int offset = topo.max_cpu_num + 1; + size_t size; + int thread_id = 0; + + thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); + if (thiscpu->thread_id < 0) + thiscpu->thread_id = thread_id++; + if (!thiscpu->put_ids) + return -1; + + size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(size, thiscpu->put_ids); + + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); + filep = fopen(path, "r"); + + if (!filep) { + warnx("%s: open failed", path); + return -1; + } + do { + offset -= BITMASK_SIZE; + if (fscanf(filep, "%lx%c", &map, &character) != 2) + err(1, "%s: failed to parse file", path); + for (shift = 0; shift < BITMASK_SIZE; shift++) { + if ((map >> shift) & 0x1) { + so = shift + offset; + sib_core = get_core_id(so); + if (sib_core == thiscpu->physical_core_id) { + CPU_SET_S(so, size, thiscpu->put_ids); + if ((so != cpu) && (cpus[so].thread_id < 0)) + cpus[so].thread_id = thread_id++; + } + } + } + } while (character == ','); fclose(filep); return CPU_COUNT_S(size, thiscpu->put_ids); @@ -2998,7 +3733,7 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); - if (cpu_is_not_present(t->cpu_id)) + if (cpu_is_not_allowed(t->cpu_id)) continue; t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); @@ -3050,11 +3785,51 @@ int for_all_proc_cpus(int (func) (int)) return 0; } +#define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective" + +static char cpu_effective_str[1024]; + +static int update_effective_str(bool startup) +{ + FILE *fp; + char *pos; + char buf[1024]; + int ret; + + if (cpu_effective_str[0] == '\0' && !startup) + return 0; + + fp = fopen(PATH_EFFECTIVE_CPUS, "r"); + if (!fp) + return 0; + + pos = fgets(buf, 1024, fp); + if (!pos) + err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS); + + fclose(fp); + + ret = strncmp(cpu_effective_str, buf, 1024); + if (!ret) + return 0; + + strncpy(cpu_effective_str, buf, 1024); + return 1; +} + +static void update_effective_set(bool startup) +{ + update_effective_str(startup); + + if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize)) + err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str); +} + void re_initialize(void) { free_all_buffers(); - setup_all_buffers(); - fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); + setup_all_buffers(false); + fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); } void set_max_cpu_num(void) @@ -3191,8 +3966,8 @@ int snapshot_gfx_rc6_ms(void) /* * snapshot_gfx_mhz() * - * record snapshot of - * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz + * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz + * when /sys/class/drm/card0/gt_cur_freq_mhz is not available. * * return 1 if config change requires a restart, else return 0 */ @@ -3201,9 +3976,11 @@ int snapshot_gfx_mhz(void) static FILE *fp; int retval; - if (fp == NULL) - fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); - else { + if (fp == NULL) { + fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r"); + if (!fp) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); + } else { rewind(fp); fflush(fp); } @@ -3218,8 +3995,8 @@ int snapshot_gfx_mhz(void) /* * snapshot_gfx_cur_mhz() * - * record snapshot of - * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz + * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz + * when /sys/class/drm/card0/gt_act_freq_mhz is not available. * * return 1 if config change requires a restart, else return 0 */ @@ -3228,9 +4005,11 @@ int snapshot_gfx_act_mhz(void) static FILE *fp; int retval; - if (fp == NULL) - fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r"); - else { + if (fp == NULL) { + fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r"); + if (!fp) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r"); + } else { rewind(fp); fflush(fp); } @@ -3562,6 +4341,10 @@ void turbostat_loop() re_initialize(); goto restart; } + if (update_effective_str(false)) { + re_initialize(); + goto restart; + } do_sleep(); if (snapshot_proc_sysfs_files()) goto restart; @@ -3674,500 +4457,133 @@ void check_permissions(void) exit(-6); } -/* - * NHM adds support for additional MSRs: - * - * MSR_SMI_COUNT 0x00000034 - * - * MSR_PLATFORM_INFO 0x000000ce - * MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 - * - * MSR_MISC_PWR_MGMT 0x000001aa - * - * MSR_PKG_C3_RESIDENCY 0x000003f8 - * MSR_PKG_C6_RESIDENCY 0x000003f9 - * MSR_CORE_C3_RESIDENCY 0x000003fc - * MSR_CORE_C6_RESIDENCY 0x000003fd - * - * Side effect: - * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL - * sets has_misc_feature_control - */ -int probe_nhm_msrs(unsigned int family, unsigned int model) +void probe_bclk(void) { unsigned long long msr; unsigned int base_ratio; - int *pkg_cstate_limits; - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - bclk = discover_bclk(family, model); + if (!platform->has_nhm_msrs) + return; - switch (model) { - case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ - case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ - pkg_cstate_limits = nhm_pkg_cstate_limits; - break; - case INTEL_FAM6_SANDYBRIDGE: /* SNB */ - case INTEL_FAM6_SANDYBRIDGE_X: /* SNB Xeon */ - case INTEL_FAM6_IVYBRIDGE: /* IVB */ - case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ - pkg_cstate_limits = snb_pkg_cstate_limits; - has_misc_feature_control = 1; - break; - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - case INTEL_FAM6_HASWELL_X: /* HSX */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_BROADWELL_G: /* BDW */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - pkg_cstate_limits = hsw_pkg_cstate_limits; - has_misc_feature_control = 1; - break; - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ - pkg_cstate_limits = skx_pkg_cstate_limits; - has_misc_feature_control = 1; - break; - case INTEL_FAM6_ICELAKE_X: /* ICX */ - pkg_cstate_limits = icx_pkg_cstate_limits; - has_misc_feature_control = 1; - break; - case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ - no_MSR_MISC_PWR_MGMT = 1; - /* FALLTHRU */ - case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ - pkg_cstate_limits = slv_pkg_cstate_limits; - break; - case INTEL_FAM6_ATOM_AIRMONT: /* AMT */ - pkg_cstate_limits = amt_pkg_cstate_limits; - no_MSR_MISC_PWR_MGMT = 1; - break; - case INTEL_FAM6_XEON_PHI_KNL: /* PHI */ - pkg_cstate_limits = phi_pkg_cstate_limits; - break; - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ - case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ - pkg_cstate_limits = glm_pkg_cstate_limits; - break; - default: - return 0; - } - get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); - pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; + if (platform->bclk_freq == BCLK_100MHZ) + bclk = 100.00; + else if (platform->bclk_freq == BCLK_133MHZ) + bclk = 133.33; + else if (platform->bclk_freq == BCLK_SLV) + bclk = slm_bclk(); + else + return; get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); base_ratio = (msr >> 8) & 0xFF; base_hz = base_ratio * bclk * 1000000; has_base_hz = 1; - return 1; -} -/* - * SLV client has support for unique MSRs: - * - * MSR_CC6_DEMOTION_POLICY_CONFIG - * MSR_MC6_DEMOTION_POLICY_CONFIG - */ + if (platform->enable_tsc_tweak) + tsc_tweak = base_hz / tsc_hz; +} -int has_slv_msrs(unsigned int family, unsigned int model) +static void remove_underbar(char *s) { - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; + char *to = s; - switch (model) { - case INTEL_FAM6_ATOM_SILVERMONT: - case INTEL_FAM6_ATOM_SILVERMONT_MID: - case INTEL_FAM6_ATOM_AIRMONT_MID: - return 1; + while (*s) { + if (*s != '_') + *to++ = *s; + s++; } - return 0; + + *to = 0; } -int is_dnv(unsigned int family, unsigned int model) +static void dump_turbo_ratio_info(void) { + if (!has_turbo) + return; - if (!genuine_intel) - return 0; + if (!platform->has_nhm_msrs) + return; - if (family != 6) - return 0; + if (platform->trl_msrs & TRL_LIMIT2) + dump_turbo_ratio_limit2(); - switch (model) { - case INTEL_FAM6_ATOM_GOLDMONT_D: - return 1; - } - return 0; -} + if (platform->trl_msrs & TRL_LIMIT1) + dump_turbo_ratio_limit1(); -int is_bdx(unsigned int family, unsigned int model) -{ + if (platform->trl_msrs & TRL_BASE) { + dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT); - if (!genuine_intel) - return 0; + if (is_hybrid) + dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT); + } - if (family != 6) - return 0; + if (platform->trl_msrs & TRL_ATOM) + dump_atom_turbo_ratio_limits(); - switch (model) { - case INTEL_FAM6_BROADWELL_X: - return 1; - } - return 0; + if (platform->trl_msrs & TRL_KNL) + dump_knl_turbo_ratio_limits(); + + if (platform->has_config_tdp) + dump_config_tdp(); } -int is_skx(unsigned int family, unsigned int model) +static int read_sysfs_int(char *path) { + FILE *input; + int retval = -1; - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_SKYLAKE_X: - return 1; + input = fopen(path, "r"); + if (input == NULL) { + if (debug) + fprintf(outf, "NSFOD %s\n", path); + return (-1); } - return 0; + if (fscanf(input, "%d", &retval) != 1) + err(1, "%s: failed to read int from file", path); + fclose(input); + + return (retval); } -int is_icx(unsigned int family, unsigned int model) +static void dump_sysfs_file(char *path) { + FILE *input; + char cpuidle_buf[64]; - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ICELAKE_X: - return 1; + input = fopen(path, "r"); + if (input == NULL) { + if (debug) + fprintf(outf, "NSFOD %s\n", path); + return; } - return 0; + if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) + err(1, "%s: failed to read file", path); + fclose(input); + + fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); } -int is_spr(unsigned int family, unsigned int model) +static void probe_intel_uncore_frequency(void) { + int i, j; + char path[128]; if (!genuine_intel) - return 0; + return; - if (family != 6) - return 0; + if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK)) + return; - switch (model) { - case INTEL_FAM6_SAPPHIRERAPIDS_X: - return 1; - } - return 0; -} + /* Cluster level sysfs not supported yet. */ + if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK)) + return; -int is_ehl(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; + if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK)) + BIC_PRESENT(BIC_UNCORE_MHZ); - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_TREMONT: - return 1; - } - return 0; -} - -int is_jvl(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_TREMONT_D: - return 1; - } - return 0; -} - -int has_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (has_slv_msrs(family, model)) - return 0; - - if (family != 6) - return 0; - - switch (model) { - /* Nehalem compatible, but do not include turbo-ratio limit support */ - case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ - case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */ - return 0; - default: - return 1; - } -} - -int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (has_slv_msrs(family, model)) - return 1; - - return 0; -} - -int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ - case INTEL_FAM6_HASWELL_X: /* HSW Xeon */ - return 1; - default: - return 0; - } -} - -int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_HASWELL_X: /* HSW Xeon */ - return 1; - default: - return 0; - } -} - -int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ - return 1; - default: - return 0; - } -} - -int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_GOLDMONT: - case INTEL_FAM6_SKYLAKE_X: - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_SAPPHIRERAPIDS_X: - return 1; - default: - return 0; - } -} - -int has_config_tdp(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_IVYBRIDGE: /* IVB */ - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_X: /* HSX */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_BROADWELL_G: /* BDW */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_ICELAKE_X: /* ICX */ - case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ - case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ - return 1; - default: - return 0; - } -} - -/* - * tcc_offset_bits: - * 0: Tcc Offset not supported (Default) - * 6: Bit 29:24 of MSR_PLATFORM_INFO - * 4: Bit 27:24 of MSR_PLATFORM_INFO - */ -void check_tcc_offset(int model) -{ - unsigned long long msr; - - if (!genuine_intel) - return; - - switch (model) { - case INTEL_FAM6_SKYLAKE_L: - case INTEL_FAM6_SKYLAKE: - case INTEL_FAM6_KABYLAKE_L: - case INTEL_FAM6_KABYLAKE: - case INTEL_FAM6_ICELAKE_L: - case INTEL_FAM6_ICELAKE: - case INTEL_FAM6_TIGERLAKE_L: - case INTEL_FAM6_TIGERLAKE: - case INTEL_FAM6_COMETLAKE: - if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) { - msr = (msr >> 30) & 1; - if (msr) - tcc_offset_bits = 6; - } - return; - default: - return; - } -} - -static void remove_underbar(char *s) -{ - char *to = s; - - while (*s) { - if (*s != '_') - *to++ = *s; - s++; - } - - *to = 0; -} - -static void dump_turbo_ratio_info(unsigned int family, unsigned int model) -{ - if (!has_turbo) - return; - - if (has_hsw_turbo_ratio_limit(family, model)) - dump_hsw_turbo_ratio_limits(); - - if (has_ivt_turbo_ratio_limit(family, model)) - dump_ivt_turbo_ratio_limits(); - - if (has_turbo_ratio_limit(family, model)) { - dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model); - - if (is_hybrid) - dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model); - } - - if (has_atom_turbo_ratio_limit(family, model)) - dump_atom_turbo_ratio_limits(); - - if (has_knl_turbo_ratio_limit(family, model)) - dump_knl_turbo_ratio_limits(); - - if (has_config_tdp(family, model)) - dump_config_tdp(); -} - -static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model) -{ - if (!do_nhm_platform_info) - return; - - dump_nhm_platform_info(); - dump_turbo_ratio_info(family, model); - dump_nhm_cst_cfg(); -} - -static int read_sysfs_int(char *path) -{ - FILE *input; - int retval = -1; - - input = fopen(path, "r"); - if (input == NULL) { - if (debug) - fprintf(outf, "NSFOD %s\n", path); - return (-1); - } - if (fscanf(input, "%d", &retval) != 1) - err(1, "%s: failed to read int from file", path); - fclose(input); - - return (retval); -} - -static void dump_sysfs_file(char *path) -{ - FILE *input; - char cpuidle_buf[64]; - - input = fopen(path, "r"); - if (input == NULL) { - if (debug) - fprintf(outf, "NSFOD %s\n", path); - return; - } - if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) - err(1, "%s: failed to read file", path); - fclose(input); - - fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); -} - -static void intel_uncore_frequency_probe(void) -{ - int i, j; - char path[128]; - - if (!genuine_intel) - return; - - if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK)) - return; - - if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK)) - BIC_PRESENT(BIC_UNCORE_MHZ); - - if (quiet) - return; + if (quiet) + return; for (i = 0; i < topo.num_packages; ++i) { for (j = 0; j < topo.num_die; ++j) { @@ -4194,6 +4610,20 @@ static void intel_uncore_frequency_probe(void) } } +static void probe_graphics(void) +{ + if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) + BIC_PRESENT(BIC_GFX_rc6); + + if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) || + !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXMHz); + + if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) || + !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXACTMHz); +} + static void dump_sysfs_cstate_config(void) { char path[64]; @@ -4310,7 +4740,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu = t->cpu_id; /* EPB is per-package */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_thread_in_package(t, c, p)) return 0; if (cpu_migrate(cpu)) { @@ -4359,7 +4789,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu = t->cpu_id; /* MSR_HWP_CAPABILITIES is per-package */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_thread_in_package(t, c, p)) return 0; if (cpu_migrate(cpu)) { @@ -4442,7 +4872,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data cpu = t->cpu_id; /* per-package */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_thread_in_package(t, c, p)) return 0; if (cpu_migrate(cpu)) { @@ -4450,7 +4880,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data return -1; } - if (do_core_perf_limit_reasons) { + if (platform->plr_msrs & PLR_CORE) { get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", @@ -4483,7 +4913,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); } - if (do_gfx_perf_limit_reasons) { + if (platform->plr_msrs & PLR_GFX) { get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", @@ -4503,7 +4933,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 25) ? "GFXPwr, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); } - if (do_ring_perf_limit_reasons) { + if (platform->plr_msrs & PLR_RING) { get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(outf, " (Active: %s%s%s%s%s%s)", @@ -4525,208 +4955,74 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ -double get_tdp_intel(unsigned int model) +double get_quirk_tdp(void) { - unsigned long long msr; - - if (do_rapl & RAPL_PKG_POWER_INFO) - if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) - return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; + if (platform->rapl_quirk_tdp) + return platform->rapl_quirk_tdp; - switch (model) { - case INTEL_FAM6_ATOM_SILVERMONT: - case INTEL_FAM6_ATOM_SILVERMONT_D: - return 30.0; - default: - return 135.0; - } + return 135.0; } -double get_tdp_amd(unsigned int family) +double get_tdp_intel(void) { - UNUSED(family); + unsigned long long msr; - /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ - return 280.0; + if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) + if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) + return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; + return get_quirk_tdp(); } -/* - * rapl_dram_energy_units_probe() - * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. - */ -static double rapl_dram_energy_units_probe(int model, double rapl_energy_units) +double get_tdp_amd(void) { - /* only called for genuine_intel, family 6 */ - - switch (model) { - case INTEL_FAM6_HASWELL_X: /* HSX */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ - case INTEL_FAM6_ICELAKE_X: /* ICX */ - return (rapl_dram_energy_units = 15.3 / 1000000); - default: - return (rapl_energy_units); - } + return get_quirk_tdp(); } -void rapl_probe_intel(unsigned int family, unsigned int model) +void rapl_probe_intel(void) { unsigned long long msr; unsigned int time_unit; double tdp; - if (family != 6) - return; - - switch (model) { - case INTEL_FAM6_SANDYBRIDGE: - case INTEL_FAM6_IVYBRIDGE: - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_BROADWELL_G: /* BDW */ - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - BIC_PRESENT(BIC_GFX_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - BIC_PRESENT(BIC_GFXWatt); - } - break; - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO; - if (rapl_joules) - BIC_PRESENT(BIC_Pkg_J); - else - BIC_PRESENT(BIC_PkgWatt); - break; - case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - do_rapl = - RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS - | RAPL_GFX | RAPL_PKG_POWER_INFO; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - BIC_PRESENT(BIC_RAM_J); - BIC_PRESENT(BIC_GFX_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - BIC_PRESENT(BIC_RAMWatt); - BIC_PRESENT(BIC_GFXWatt); - } - break; - case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ - do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; - BIC_PRESENT(BIC_PKG__); - if (rapl_joules) - BIC_PRESENT(BIC_Pkg_J); - else - BIC_PRESENT(BIC_PkgWatt); - break; - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - do_rapl = - RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS - | RAPL_GFX | RAPL_PKG_POWER_INFO; - BIC_PRESENT(BIC_PKG__); - BIC_PRESENT(BIC_RAM__); - if (rapl_joules) { + if (rapl_joules) { + if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS) BIC_PRESENT(BIC_Pkg_J); + if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) BIC_PRESENT(BIC_Cor_J); + if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS) BIC_PRESENT(BIC_RAM_J); + if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS) BIC_PRESENT(BIC_GFX_J); - } else { + } else { + if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS) BIC_PRESENT(BIC_PkgWatt); + if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) BIC_PRESENT(BIC_CorWatt); + if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS) BIC_PRESENT(BIC_RAMWatt); + if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS) BIC_PRESENT(BIC_GFXWatt); - } - break; - case INTEL_FAM6_HASWELL_X: /* HSX */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_ICELAKE_X: /* ICX */ - case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ - case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ - do_rapl = - RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | - RAPL_PKG_POWER_INFO; + } + + if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) BIC_PRESENT(BIC_PKG__); + if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) BIC_PRESENT(BIC_RAM__); - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_RAM_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_RAMWatt); - } - break; - case INTEL_FAM6_SANDYBRIDGE_X: - case INTEL_FAM6_IVYBRIDGE_X: - do_rapl = - RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | - RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; - BIC_PRESENT(BIC_PKG__); - BIC_PRESENT(BIC_RAM__); - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - BIC_PRESENT(BIC_RAM_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - BIC_PRESENT(BIC_RAMWatt); - } - break; - case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ - case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ - do_rapl = RAPL_PKG | RAPL_CORES; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - } - break; - case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ - do_rapl = - RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | - RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; - BIC_PRESENT(BIC_PKG__); - BIC_PRESENT(BIC_RAM__); - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - BIC_PRESENT(BIC_RAM_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - BIC_PRESENT(BIC_RAMWatt); - } - break; - default: - return; - } /* units on package 0, verify later other packages match */ if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) return; rapl_power_units = 1.0 / (1 << (msr & 0xF)); - if (model == INTEL_FAM6_ATOM_SILVERMONT) + if (platform->has_rapl_divisor) rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; else rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); - rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units); + if (platform->has_fixed_rapl_unit) + rapl_dram_energy_units = (15.3 / 1000000); + else + rapl_dram_energy_units = rapl_energy_units; time_unit = msr >> 16 & 0xF; if (time_unit == 0) @@ -4734,32 +5030,18 @@ void rapl_probe_intel(unsigned int family, unsigned int model) rapl_time_units = 1.0 / (1 << (time_unit)); - tdp = get_tdp_intel(model); + tdp = get_tdp_intel(); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (!quiet) fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); } -void rapl_probe_amd(unsigned int family, unsigned int model) +void rapl_probe_amd(void) { unsigned long long msr; - unsigned int eax, ebx, ecx, edx; - unsigned int has_rapl = 0; double tdp; - UNUSED(model); - - if (max_extended_level >= 0x80000007) { - __cpuid(0x80000007, eax, ebx, ecx, edx); - /* RAPL (Fam 17h+) */ - has_rapl = edx & (1 << 14); - } - - if (!has_rapl || family < 0x17) - return; - - do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; if (rapl_joules) { BIC_PRESENT(BIC_Pkg_J); BIC_PRESENT(BIC_Cor_J); @@ -4775,128 +5057,13 @@ void rapl_probe_amd(unsigned int family, unsigned int model) rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); rapl_power_units = ldexp(1.0, -(msr & 0xf)); - tdp = get_tdp_amd(family); + tdp = get_tdp_amd(); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (!quiet) fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); } -/* - * rapl_probe() - * - * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units - */ -void rapl_probe(unsigned int family, unsigned int model) -{ - if (genuine_intel) - rapl_probe_intel(family, model); - if (authentic_amd || hygon_genuine) - rapl_probe_amd(family, model); -} - -void perf_limit_reasons_probe(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return; - - if (family != 6) - return; - - switch (model) { - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - do_gfx_perf_limit_reasons = 1; - /* FALLTHRU */ - case INTEL_FAM6_HASWELL_X: /* HSX */ - do_core_perf_limit_reasons = 1; - do_ring_perf_limit_reasons = 1; - default: - return; - } -} - -void automatic_cstate_conversion_probe(unsigned int family, unsigned int model) -{ - if (family != 6) - return; - - switch (model) { - case INTEL_FAM6_BROADWELL_X: - case INTEL_FAM6_SKYLAKE_X: - has_automatic_cstate_conversion = 1; - } -} - -void prewake_cstate_probe(unsigned int family, unsigned int model) -{ - if (is_icx(family, model) || is_spr(family, model)) - dis_cstate_prewake = 1; -} - -int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) -{ - unsigned long long msr; - unsigned int dts, dts2; - int cpu; - - UNUSED(c); - UNUSED(p); - - if (!(do_dts || do_ptm)) - return 0; - - cpu = t->cpu_id; - - /* DTS is per-core, no need to print for each thread */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) - return 0; - - if (cpu_migrate(cpu)) { - fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); - return -1; - } - - if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { - if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) - return 0; - - dts = (msr >> 16) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); - - if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) - return 0; - - dts = (msr >> 16) & 0x7F; - dts2 = (msr >> 8) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tj_max - dts, tj_max - dts2); - } - - if (do_dts && debug) { - unsigned int resolution; - - if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) - return 0; - - dts = (msr >> 16) & 0x7F; - resolution = (msr >> 27) & 0xF; - fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", - cpu, msr, tj_max - dts, resolution); - - if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) - return 0; - - dts = (msr >> 16) & 0x7F; - dts2 = (msr >> 8) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tj_max - dts, tj_max - dts2); - } - - return 0; -} - void print_power_limit_msr(int cpu, unsigned long long msr, char *label) { fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", @@ -4918,11 +5085,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) UNUSED(c); UNUSED(p); - if (!do_rapl) + if (!platform->rapl_msrs) return 0; /* RAPL counters are per package, so print only for 1st thread/package */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_thread_in_package(t, c, p)) return 0; cpu = t->cpu_id; @@ -4931,7 +5098,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -1; } - if (do_rapl & RAPL_AMD_F17H) { + if (platform->rapl_msrs & RAPL_AMD_F17H) { msr_name = "MSR_RAPL_PWR_UNIT"; if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) return -1; @@ -4944,7 +5111,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, rapl_power_units, rapl_energy_units, rapl_time_units); - if (do_rapl & RAPL_PKG_POWER_INFO) { + if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) return -5; @@ -4957,7 +5124,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); } - if (do_rapl & RAPL_PKG) { + if (platform->rapl_msrs & RAPL_PKG) { if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) return -9; @@ -4981,7 +5148,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); } - if (do_rapl & RAPL_DRAM_POWER_INFO) { + if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) return -6; @@ -4992,7 +5159,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); } - if (do_rapl & RAPL_DRAM) { + if (platform->rapl_msrs & RAPL_DRAM) { if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", @@ -5000,20 +5167,20 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) print_power_limit_msr(cpu, msr, "DRAM Limit"); } - if (do_rapl & RAPL_CORE_POLICY) { + if (platform->rapl_msrs & RAPL_CORE_POLICY) { if (get_msr(cpu, MSR_PP0_POLICY, &msr)) return -7; fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); } - if (do_rapl & RAPL_CORES_POWER_LIMIT) { + if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "Cores Limit"); } - if (do_rapl & RAPL_GFX) { + if (platform->rapl_msrs & RAPL_GFX) { if (get_msr(cpu, MSR_PP1_POLICY, &msr)) return -8; @@ -5029,217 +5196,24 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) } /* - * SNB adds support for additional MSRs: - * - * MSR_PKG_C7_RESIDENCY 0x000003fa - * MSR_CORE_C7_RESIDENCY 0x000003fe - * MSR_PKG_C2_RESIDENCY 0x0000060d - */ - -int has_snb_msrs(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_SANDYBRIDGE: - case INTEL_FAM6_SANDYBRIDGE_X: - case INTEL_FAM6_IVYBRIDGE: /* IVB */ - case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_X: /* HSW */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_BROADWELL_G: /* BDW */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_ICELAKE_X: /* ICX */ - case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ - case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ - return 1; - } - return 0; -} - -/* - * HSW ULT added support for C8/C9/C10 MSRs: - * - * MSR_PKG_C8_RESIDENCY 0x00000630 - * MSR_PKG_C9_RESIDENCY 0x00000631 - * MSR_PKG_C10_RESIDENCY 0x00000632 - * - * MSR_PKGC8_IRTL 0x00000633 - * MSR_PKGC9_IRTL 0x00000634 - * MSR_PKGC10_IRTL 0x00000635 - * - */ -int has_c8910_msrs(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - return 1; - } - return 0; -} - -/* - * SKL adds support for additional MSRS: + * probe_rapl() * - * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 - * MSR_PKG_ANY_CORE_C0_RES 0x00000659 - * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A - * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + * sets rapl_power_units, rapl_energy_units, rapl_time_units */ -int has_skl_msrs(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - return 1; - } - return 0; -} - -int is_slm(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ - case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ - return 1; - } - return 0; -} - -int is_knl(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ - return 1; - } - return 0; -} - -int is_cnl(unsigned int family, unsigned int model) -{ - if (!genuine_intel) - return 0; - - if (family != 6) - return 0; - - switch (model) { - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - return 1; - } - - return 0; -} - -unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) -{ - if (is_knl(family, model)) - return 1024; - return 1; -} - -#define SLM_BCLK_FREQS 5 -double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; - -double slm_bclk(void) -{ - unsigned long long msr = 3; - unsigned int i; - double freq; - - if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) - fprintf(outf, "SLM BCLK: unknown\n"); - - i = msr & 0xf; - if (i >= SLM_BCLK_FREQS) { - fprintf(outf, "SLM BCLK[%d] invalid\n", i); - i = 3; - } - freq = slm_freq_table[i]; - - if (!quiet) - fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); - - return freq; -} - -double discover_bclk(unsigned int family, unsigned int model) +void probe_rapl(void) { - if (has_snb_msrs(family, model) || is_knl(family, model)) - return 100.00; - else if (is_slm(family, model)) - return slm_bclk(); - else - return 133.33; -} - -int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) -{ - unsigned int eax, ebx, ecx, edx; - - UNUSED(c); - UNUSED(p); - - if (!genuine_intel) - return 0; + if (!platform->rapl_msrs) + return; - if (cpu_migrate(t->cpu_id)) { - fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); - return -1; - } + if (genuine_intel) + rapl_probe_intel(); + if (authentic_amd || hygon_genuine) + rapl_probe_amd(); - if (max_level < 0x1a) - return 0; + if (quiet) + return; - __cpuid(0x1a, eax, ebx, ecx, edx); - eax = (eax >> 24) & 0xFF; - if (eax == 0x20) - t->is_atom = true; - return 0; + for_all_cpus(print_rapl, ODD_COUNTERS); } /* @@ -5268,7 +5242,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk return 0; /* this is a per-package concept */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + if (!is_cpu_first_thread_in_package(t, c, p)) return 0; cpu = t->cpu_id; @@ -5284,7 +5258,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk } /* Temperature Target MSR is Nehalem and newer only */ - if (!do_nhm_platform_info) + if (!platform->has_nhm_msrs) goto guess; if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) @@ -5293,34 +5267,134 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk tcc_default = (msr >> 16) & 0xFF; if (!quiet) { - switch (tcc_offset_bits) { - case 4: - tcc_offset = (msr >> 24) & 0xF; - fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", - cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); - break; - case 6: - tcc_offset = (msr >> 24) & 0x3F; + int bits = platform->tcc_offset_bits; + unsigned long long enabled = 0; + + if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled)) + enabled = (enabled >> 30) & 1; + + if (bits && enabled) { + tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0); fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); - break; - default: + } else { fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); - break; } } - if (!tcc_default) - goto guess; + if (!tcc_default) + goto guess; + + tj_max = tcc_default; + + return 0; + +guess: + tj_max = TJMAX_DEFAULT; + fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); + + return 0; +} + +int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + unsigned int dts, dts2; + int cpu; + + UNUSED(c); + UNUSED(p); + + if (!(do_dts || do_ptm)) + return 0; + + cpu = t->cpu_id; + + /* DTS is per-core, no need to print for each thread */ + if (!is_cpu_first_thread_in_core(t, c, p)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); + + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + dts2 = (msr >> 8) & 0x7F; + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + cpu, msr, tj_max - dts, tj_max - dts2); + } + + if (do_dts && debug) { + unsigned int resolution; + + if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + resolution = (msr >> 27) & 0xF; + fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", + cpu, msr, tj_max - dts, resolution); + + if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + dts2 = (msr >> 8) & 0x7F; + fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + cpu, msr, tj_max - dts, tj_max - dts2); + } + + return 0; +} + +void probe_thermal(void) +{ + if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) + BIC_PRESENT(BIC_CORE_THROT_CNT); + else + BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); + + for_all_cpus(set_temperature_target, ODD_COUNTERS); + + if (quiet) + return; + + for_all_cpus(print_thermal, ODD_COUNTERS); +} + +int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned int eax, ebx, ecx, edx; + + UNUSED(c); + UNUSED(p); - tj_max = tcc_default; + if (!genuine_intel) + return 0; - return 0; + if (cpu_migrate(t->cpu_id)) { + fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); + return -1; + } -guess: - tj_max = TJMAX_DEFAULT; - fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); + if (max_level < 0x1a) + return 0; + __cpuid(0x1a, eax, ebx, ecx, edx); + eax = (eax >> 24) & 0xFF; + if (eax == 0x20) + t->is_atom = true; return 0; } @@ -5354,7 +5428,7 @@ void decode_misc_feature_control(void) { unsigned long long msr; - if (!has_misc_feature_control) + if (!platform->has_msr_misc_feature_control) return; if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) @@ -5375,10 +5449,7 @@ void decode_misc_pwr_mgmt_msr(void) { unsigned long long msr; - if (!do_nhm_platform_info) - return; - - if (no_MSR_MISC_PWR_MGMT) + if (!platform->has_msr_misc_pwr_mgmt) return; if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) @@ -5397,6 +5468,9 @@ void decode_c6_demotion_policy_msr(void) { unsigned long long msr; + if (!platform->has_msr_c6_demotion_policy_config) + return; + if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); @@ -5406,67 +5480,6 @@ void decode_c6_demotion_policy_msr(void) base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); } -/* - * When models are the same, for the purpose of turbostat, reuse - */ -unsigned int intel_model_duplicates(unsigned int model) -{ - - switch (model) { - case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ - case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ - case 0x1F: /* Core i7 and i5 Processor - Nehalem */ - case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */ - case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */ - return INTEL_FAM6_NEHALEM; - - case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ - case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */ - return INTEL_FAM6_NEHALEM_EX; - - case INTEL_FAM6_XEON_PHI_KNM: - return INTEL_FAM6_XEON_PHI_KNL; - - case INTEL_FAM6_BROADWELL_X: - case INTEL_FAM6_BROADWELL_D: /* BDX-DE */ - return INTEL_FAM6_BROADWELL_X; - - case INTEL_FAM6_SKYLAKE_L: - case INTEL_FAM6_SKYLAKE: - case INTEL_FAM6_KABYLAKE_L: - case INTEL_FAM6_KABYLAKE: - case INTEL_FAM6_COMETLAKE_L: - case INTEL_FAM6_COMETLAKE: - return INTEL_FAM6_SKYLAKE_L; - - case INTEL_FAM6_ICELAKE_L: - case INTEL_FAM6_ICELAKE_NNPI: - case INTEL_FAM6_TIGERLAKE_L: - case INTEL_FAM6_TIGERLAKE: - case INTEL_FAM6_ROCKETLAKE: - case INTEL_FAM6_LAKEFIELD: - case INTEL_FAM6_ALDERLAKE: - case INTEL_FAM6_ALDERLAKE_L: - case INTEL_FAM6_ATOM_GRACEMONT: - case INTEL_FAM6_RAPTORLAKE: - case INTEL_FAM6_RAPTORLAKE_P: - case INTEL_FAM6_RAPTORLAKE_S: - case INTEL_FAM6_METEORLAKE: - case INTEL_FAM6_METEORLAKE_L: - return INTEL_FAM6_CANNONLAKE_L; - - case INTEL_FAM6_ATOM_TREMONT_L: - return INTEL_FAM6_ATOM_TREMONT; - - case INTEL_FAM6_ICELAKE_D: - return INTEL_FAM6_ICELAKE_X; - - case INTEL_FAM6_EMERALDRAPIDS_X: - return INTEL_FAM6_SAPPHIRERAPIDS_X; - } - return model; -} - void print_dev_latency(void) { char *path = "/dev/cpu_dma_latency"; @@ -5510,6 +5523,101 @@ void linux_perf_init(void) BIC_PRESENT(BIC_IPC); } +void probe_cstates(void) +{ + probe_cst_limit(); + + if (platform->supported_cstates & CC1) + BIC_PRESENT(BIC_CPU_c1); + + if (platform->supported_cstates & CC3) + BIC_PRESENT(BIC_CPU_c3); + + if (platform->supported_cstates & CC6) + BIC_PRESENT(BIC_CPU_c6); + + if (platform->supported_cstates & CC7) + BIC_PRESENT(BIC_CPU_c7); + + if (platform->supported_cstates & PC2 && (pkg_cstate_limit >= PCL__2)) + BIC_PRESENT(BIC_Pkgpc2); + + if (platform->supported_cstates & PC3 && (pkg_cstate_limit >= PCL__3)) + BIC_PRESENT(BIC_Pkgpc3); + + if (platform->supported_cstates & PC6 && (pkg_cstate_limit >= PCL__6)) + BIC_PRESENT(BIC_Pkgpc6); + + if (platform->supported_cstates & PC7 && (pkg_cstate_limit >= PCL__7)) + BIC_PRESENT(BIC_Pkgpc7); + + if (platform->supported_cstates & PC8 && (pkg_cstate_limit >= PCL__8)) + BIC_PRESENT(BIC_Pkgpc8); + + if (platform->supported_cstates & PC9 && (pkg_cstate_limit >= PCL__9)) + BIC_PRESENT(BIC_Pkgpc9); + + if (platform->supported_cstates & PC10 && (pkg_cstate_limit >= PCL_10)) + BIC_PRESENT(BIC_Pkgpc10); + + if (platform->has_msr_module_c6_res_ms) + BIC_PRESENT(BIC_Mod_c6); + + if (platform->has_ext_cst_msrs) { + BIC_PRESENT(BIC_Totl_c0); + BIC_PRESENT(BIC_Any_c0); + BIC_PRESENT(BIC_GFX_c0); + BIC_PRESENT(BIC_CPUGFX); + } + + if (quiet) + return; + + dump_power_ctl(); + dump_cst_cfg(); + decode_c6_demotion_policy_msr(); + print_dev_latency(); + dump_sysfs_cstate_config(); + print_irtl(); +} + +void probe_lpi(void) +{ + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) + BIC_PRESENT(BIC_CPU_LPI); + else + BIC_NOT_PRESENT(BIC_CPU_LPI); + + if (!access(sys_lpi_file_sysfs, R_OK)) { + sys_lpi_file = sys_lpi_file_sysfs; + BIC_PRESENT(BIC_SYS_LPI); + } else if (!access(sys_lpi_file_debugfs, R_OK)) { + sys_lpi_file = sys_lpi_file_debugfs; + BIC_PRESENT(BIC_SYS_LPI); + } else { + sys_lpi_file_sysfs = NULL; + BIC_NOT_PRESENT(BIC_SYS_LPI); + } + +} + +void probe_pstates(void) +{ + probe_bclk(); + + if (quiet) + return; + + dump_platform_info(); + dump_turbo_ratio_info(); + dump_sysfs_pstate_config(); + decode_misc_pwr_mgmt_msr(); + + for_all_cpus(print_hwp, ODD_COUNTERS); + for_all_cpus(print_epb, ODD_COUNTERS); + for_all_cpus(print_perf_limit, ODD_COUNTERS); +} + void process_cpuid() { unsigned int eax, ebx, ecx, edx; @@ -5569,10 +5677,8 @@ void process_cpuid() edx_flags & (1 << 22) ? "ACPI-TM" : "-", edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); } - if (genuine_intel) { - model_orig = model; - model = intel_model_duplicates(model); - } + + probe_platform_features(family, model); if (!(edx_flags & (1 << 5))) errx(1, "CPUID: no MSR"); @@ -5656,26 +5762,12 @@ void process_cpuid() __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); if (ebx_tsc != 0) { - if (!quiet && (ebx != 0)) fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", eax_crystal, ebx_tsc, crystal_hz); if (crystal_hz == 0) - switch (model) { - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - crystal_hz = 24000000; /* 24.0 MHz */ - break; - case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ - crystal_hz = 25000000; /* 25.0 MHz */ - break; - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - crystal_hz = 19200000; /* 19.2 MHz */ - break; - default: - crystal_hz = 0; - } + crystal_hz = platform->crystal_freq; if (crystal_hz) { tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; @@ -5700,147 +5792,33 @@ void process_cpuid() } if (has_aperf) - aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); + aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; BIC_PRESENT(BIC_IRQ); BIC_PRESENT(BIC_TSC_MHz); +} - if (probe_nhm_msrs(family, model)) { - do_nhm_platform_info = 1; - BIC_PRESENT(BIC_CPU_c1); - BIC_PRESENT(BIC_CPU_c3); - BIC_PRESENT(BIC_CPU_c6); - BIC_PRESENT(BIC_SMI); - } - do_snb_cstates = has_snb_msrs(family, model); - - if (do_snb_cstates) - BIC_PRESENT(BIC_CPU_c7); - - do_irtl_snb = has_snb_msrs(family, model); - if (do_snb_cstates && (pkg_cstate_limit >= PCL__2)) - BIC_PRESENT(BIC_Pkgpc2); - if (pkg_cstate_limit >= PCL__3) - BIC_PRESENT(BIC_Pkgpc3); - if (pkg_cstate_limit >= PCL__6) - BIC_PRESENT(BIC_Pkgpc6); - if (do_snb_cstates && (pkg_cstate_limit >= PCL__7)) - BIC_PRESENT(BIC_Pkgpc7); - if (has_slv_msrs(family, model)) { - BIC_NOT_PRESENT(BIC_Pkgpc2); - BIC_NOT_PRESENT(BIC_Pkgpc3); - BIC_PRESENT(BIC_Pkgpc6); - BIC_NOT_PRESENT(BIC_Pkgpc7); - BIC_PRESENT(BIC_Mod_c6); - use_c1_residency_msr = 1; - } - if (is_jvl(family, model)) { - BIC_NOT_PRESENT(BIC_CPU_c3); - BIC_NOT_PRESENT(BIC_CPU_c7); - BIC_NOT_PRESENT(BIC_Pkgpc2); - BIC_NOT_PRESENT(BIC_Pkgpc3); - BIC_NOT_PRESENT(BIC_Pkgpc6); - BIC_NOT_PRESENT(BIC_Pkgpc7); - } - if (is_dnv(family, model)) { - BIC_PRESENT(BIC_CPU_c1); - BIC_NOT_PRESENT(BIC_CPU_c3); - BIC_NOT_PRESENT(BIC_Pkgpc3); - BIC_NOT_PRESENT(BIC_CPU_c7); - BIC_NOT_PRESENT(BIC_Pkgpc7); - use_c1_residency_msr = 1; - } - if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) { - BIC_NOT_PRESENT(BIC_CPU_c3); - BIC_NOT_PRESENT(BIC_Pkgpc3); - BIC_NOT_PRESENT(BIC_CPU_c7); - BIC_NOT_PRESENT(BIC_Pkgpc7); - } - if (is_bdx(family, model)) { - BIC_NOT_PRESENT(BIC_CPU_c7); - BIC_NOT_PRESENT(BIC_Pkgpc7); - } - if (has_c8910_msrs(family, model)) { - if (pkg_cstate_limit >= PCL__8) - BIC_PRESENT(BIC_Pkgpc8); - if (pkg_cstate_limit >= PCL__9) - BIC_PRESENT(BIC_Pkgpc9); - if (pkg_cstate_limit >= PCL_10) - BIC_PRESENT(BIC_Pkgpc10); - } - do_irtl_hsw = has_c8910_msrs(family, model); - if (has_skl_msrs(family, model)) { - BIC_PRESENT(BIC_Totl_c0); - BIC_PRESENT(BIC_Any_c0); - BIC_PRESENT(BIC_GFX_c0); - BIC_PRESENT(BIC_CPUGFX); - } - do_slm_cstates = is_slm(family, model); - do_knl_cstates = is_knl(family, model); - - if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model)) - BIC_NOT_PRESENT(BIC_CPU_c3); - - if (!quiet) - decode_misc_pwr_mgmt_msr(); - - if (!quiet && has_slv_msrs(family, model)) - decode_c6_demotion_policy_msr(); - - rapl_probe(family, model); - perf_limit_reasons_probe(family, model); - automatic_cstate_conversion_probe(family, model); - - check_tcc_offset(model_orig); - - if (!quiet) - dump_cstate_pstate_config_info(family, model); - intel_uncore_frequency_probe(); - - if (!quiet) - print_dev_latency(); - if (!quiet) - dump_sysfs_cstate_config(); - if (!quiet) - dump_sysfs_pstate_config(); +void probe_pm_features(void) +{ + probe_pstates(); - if (has_skl_msrs(family, model) || is_ehl(family, model)) - calculate_tsc_tweak(); + probe_cstates(); - if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) - BIC_PRESENT(BIC_GFX_rc6); + probe_lpi(); - if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) - BIC_PRESENT(BIC_GFXMHz); + probe_intel_uncore_frequency(); - if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) - BIC_PRESENT(BIC_GFXACTMHz); + probe_graphics(); - if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) - BIC_PRESENT(BIC_CPU_LPI); - else - BIC_NOT_PRESENT(BIC_CPU_LPI); + probe_rapl(); - if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) - BIC_PRESENT(BIC_CORE_THROT_CNT); - else - BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); + probe_thermal(); - if (!access(sys_lpi_file_sysfs, R_OK)) { - sys_lpi_file = sys_lpi_file_sysfs; - BIC_PRESENT(BIC_SYS_LPI); - } else if (!access(sys_lpi_file_debugfs, R_OK)) { - sys_lpi_file = sys_lpi_file_debugfs; - BIC_PRESENT(BIC_SYS_LPI); - } else { - sys_lpi_file_sysfs = NULL; - BIC_NOT_PRESENT(BIC_SYS_LPI); - } + if (platform->has_nhm_msrs) + BIC_PRESENT(BIC_SMI); if (!quiet) decode_misc_feature_control(); - - return; } /* @@ -5855,7 +5833,7 @@ int dir_filter(const struct dirent *dirp) return 0; } -void topology_probe() +void topology_probe(bool startup) { int i; int max_core_id = 0; @@ -5888,14 +5866,62 @@ void topology_probe() for_all_proc_cpus(mark_cpu_present); /* - * Validate that all cpus in cpu_subset are also in cpu_present_set + * Allocate and initialize cpu_effective_set + */ + cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_effective_set == NULL) + err(3, "CPU_ALLOC"); + cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set); + update_effective_set(startup); + + /* + * Allocate and initialize cpu_allowed_set + */ + cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_allowed_set == NULL) + err(3, "CPU_ALLOC"); + cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set); + + /* + * Validate and update cpu_allowed_set. + * + * Make sure all cpus in cpu_subset are also in cpu_present_set during startup. + * Give a warning when cpus in cpu_subset become unavailable at runtime. + * Give a warning when cpus are not effective because of cgroup setting. + * + * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset. */ for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { - if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) - if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) - err(1, "cpu%d not present", i); + if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) + continue; + + if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) { + if (cpu_subset) { + /* cpus in cpu_subset must be in cpu_present_set during startup */ + if (startup) + err(1, "cpu%d not present", i); + else + fprintf(stderr, "cpu%d not present\n", i); + } + continue; + } + + if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) { + if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) { + fprintf(stderr, "cpu%d not effective\n", i); + continue; + } + } + + CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set); } + if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set)) + err(-ENODEV, "No valid cpus found"); + sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set); + /* * Allocate and initialize cpu_affinity_set */ @@ -6009,15 +6035,19 @@ void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_ if (*c == NULL) goto error; - for (i = 0; i < num_cores; i++) + for (i = 0; i < num_cores; i++) { (*c)[i].core_id = -1; + (*c)[i].base_cpu = -1; + } *p = calloc(topo.num_packages, sizeof(struct pkg_data)); if (*p == NULL) goto error; - for (i = 0; i < topo.num_packages; i++) + for (i = 0; i < topo.num_packages; i++) { (*p)[i].package_id = i; + (*p)[i].base_cpu = -1; + } return; error: @@ -6050,10 +6080,11 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, p = GET_PKG(pkg_base, pkg_id); t->cpu_id = cpu_id; - if (thread_id == 0) { - t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; - if (cpu_is_first_core_in_package(cpu_id)) - t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; + if (!cpu_is_not_allowed(cpu_id)) { + if (c->base_cpu < 0) + c->base_cpu = t->cpu_id; + if (p->base_cpu < 0) + p->base_cpu = t->cpu_id; } c->core_id = core_id; @@ -6093,59 +6124,64 @@ void allocate_irq_buffers(void) err(-1, "calloc %d", topo.max_cpu_num + 1); } -void setup_all_buffers(void) +int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + topo.allowed_cpus++; + if ((int)t->cpu_id == c->base_cpu) + topo.allowed_cores++; + if ((int)t->cpu_id == p->base_cpu) + topo.allowed_packages++; + + return 0; +} + +void topology_update(void) +{ + topo.allowed_cpus = 0; + topo.allowed_cores = 0; + topo.allowed_packages = 0; + for_all_cpus(update_topo, ODD_COUNTERS); +} +void setup_all_buffers(bool startup) { - topology_probe(); + topology_probe(startup); allocate_irq_buffers(); allocate_fd_percpu(); allocate_counters(&thread_even, &core_even, &package_even); allocate_counters(&thread_odd, &core_odd, &package_odd); allocate_output_buffer(); for_all_proc_cpus(initialize_counters); + topology_update(); } void set_base_cpu(void) { - base_cpu = sched_getcpu(); - if (base_cpu < 0) - err(-ENODEV, "No valid cpus found"); + int i; - if (debug > 1) - fprintf(outf, "base_cpu = %d\n", base_cpu); + for (i = 0; i < topo.max_cpu_num + 1; ++i) { + if (cpu_is_not_allowed(i)) + continue; + base_cpu = i; + if (debug > 1) + fprintf(outf, "base_cpu = %d\n", base_cpu); + return; + } + err(-ENODEV, "No valid cpus found"); } void turbostat_init() { - setup_all_buffers(); + setup_all_buffers(true); set_base_cpu(); check_dev_msr(); check_permissions(); process_cpuid(); + probe_pm_features(); linux_perf_init(); - if (!quiet) - for_all_cpus(print_hwp, ODD_COUNTERS); - - if (!quiet) - for_all_cpus(print_epb, ODD_COUNTERS); - - if (!quiet) - for_all_cpus(print_perf_limit, ODD_COUNTERS); - - if (!quiet) - for_all_cpus(print_rapl, ODD_COUNTERS); - - for_all_cpus(set_temperature_target, ODD_COUNTERS); - for_all_cpus(get_cpu_type, ODD_COUNTERS); for_all_cpus(get_cpu_type, EVEN_COUNTERS); - if (!quiet) - for_all_cpus(print_thermal, ODD_COUNTERS); - - if (!quiet && do_irtl_snb) - print_irtl(); - if (DO_BIC(BIC_IPC)) (void)get_instr_count_fd(base_cpu); } @@ -6160,8 +6196,6 @@ int fork_it(char **argv) first_counter_read = 0; if (status) exit(status); - /* clear affinity side-effect of get_counters() */ - sched_setaffinity(0, cpu_present_setsize, cpu_present_set); gettimeofday(&tv_even, (struct timezone *)NULL); child_pid = fork(); @@ -6225,7 +6259,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2023.03.17 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -6508,9 +6542,6 @@ void probe_sysfs(void) */ void parse_cpu_command(char *optarg) { - unsigned int start, end; - char *next; - if (!strcmp(optarg, "core")) { if (cpu_subset) goto error; @@ -6533,52 +6564,8 @@ void parse_cpu_command(char *optarg) CPU_ZERO_S(cpu_subset_size, cpu_subset); - next = optarg; - - while (next && *next) { - - if (*next == '-') /* no negative cpu numbers */ - goto error; - - start = strtoul(next, &next, 10); - - if (start >= CPU_SUBSET_MAXCPUS) - goto error; - CPU_SET_S(start, cpu_subset_size, cpu_subset); - - if (*next == '\0') - break; - - if (*next == ',') { - next += 1; - continue; - } - - if (*next == '-') { - next += 1; /* start range */ - } else if (*next == '.') { - next += 1; - if (*next == '.') - next += 1; /* start range */ - else - goto error; - } - - end = strtoul(next, &next, 10); - if (end <= start) - goto error; - - while (++start <= end) { - if (start >= CPU_SUBSET_MAXCPUS) - goto error; - CPU_SET_S(start, cpu_subset_size, cpu_subset); - } - - if (*next == ',') - next += 1; - else if (*next != '\0') - goto error; - } + if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size)) + goto error; return; @@ -6719,6 +6706,19 @@ void cmdline(int argc, char **argv) int main(int argc, char **argv) { + int fd, ret; + + fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); + if (fd < 0) + goto skip_cgroup_setting; + + ret = write(fd, "0\n", 2); + if (ret == -1) + perror("Can't update cgroup\n"); + + close(fd); + +skip_cgroup_setting: outf = stderr; cmdline(argc, argv); diff --git a/tools/testing/selftests/bpf/progs/verifier_cfg.c b/tools/testing/selftests/bpf/progs/verifier_cfg.c index df7697b94007b12d8958738c1b38d444c10c5eff..c1f55e1d80a426f3b87c2f13a74176f19bb18e28 100644 --- a/tools/testing/selftests/bpf/progs/verifier_cfg.c +++ b/tools/testing/selftests/bpf/progs/verifier_cfg.c @@ -97,4 +97,66 @@ l0_%=: r2 = r0; \ " ::: __clobber_all); } +SEC("socket") +__description("conditional loop (2)") +__success +__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11") +__naked void conditional_loop2(void) +{ + asm volatile (" \ + r9 = 2 ll; \ + r3 = 0x20 ll; \ + r4 = 0x35 ll; \ + r8 = r4; \ + goto l1_%=; \ +l0_%=: r9 -= r3; \ + r9 -= r4; \ + r9 -= r8; \ +l1_%=: r8 += r4; \ + if r8 < 0x64 goto l0_%=; \ + r0 = r9; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unconditional loop after conditional jump") +__failure __msg("infinite loop detected") +__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2") +__naked void uncond_loop_after_cond_jmp(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 > 0 goto l1_%=; \ +l0_%=: r0 = 1; \ + goto l0_%=; \ +l1_%=: exit; \ +" ::: __clobber_all); +} + + +__naked __noinline __used +static unsigned long never_ending_subprog() +{ + asm volatile (" \ + r0 = r1; \ + goto -1; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unconditional loop after conditional jump") +/* infinite loop is detected *after* check_cfg() */ +__failure __msg("infinite loop detected") +__naked void uncond_loop_in_subprog_after_cond_jmp(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 > 0 goto l1_%=; \ +l0_%=: r0 += 1; \ + call never_ending_subprog; \ +l1_%=: exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c index 5bc86af80a9ad4ea9fa238c6c087655d798e49da..71735dbf33d4f868e32197fe5e6e051da9fa87da 100644 --- a/tools/testing/selftests/bpf/progs/verifier_loops1.c +++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c @@ -75,9 +75,10 @@ l0_%=: r0 += 1; \ " ::: __clobber_all); } -SEC("tracepoint") +SEC("socket") __description("bounded loop, start in the middle") -__failure __msg("back-edge") +__success +__failure_unpriv __msg_unpriv("back-edge") __naked void loop_start_in_the_middle(void) { asm volatile (" \ @@ -136,7 +137,9 @@ l0_%=: exit; \ SEC("tracepoint") __description("bounded recursion") -__failure __msg("back-edge") +__failure +/* verifier limitation in detecting max stack depth */ +__msg("the call stack of 8 frames is too deep !") __naked void bounded_recursion(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 193c0f8272d056f562266d891cdfd14c47c011e3..6b564d4c09866a0752fb27e8927ecb5aa96f15a1 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -91,3 +91,43 @@ __naked int bpf_end_bswap(void) } #endif /* v4 instruction */ + +SEC("?raw_tp") +__success __log_level(2) +/* + * Without the bug fix there will be no history between "last_idx 3 first_idx 3" + * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor + * expected log messages to the one specific mark_chain_precision operation. + * + * This is quite fragile: if verifier checkpointing heuristic changes, this + * might need adjusting. + */ +__msg("2: (07) r0 += 1 ; R0_w=6") +__msg("3: (35) if r0 >= 0xa goto pc+1") +__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1") +__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4") +__msg("3: R0_w=6") +__naked int state_loop_first_last_equal(void) +{ + asm volatile ( + "r0 = 0;" + "l0_%=:" + "r0 += 1;" + "r0 += 1;" + /* every few iterations we'll have a checkpoint here with + * first_idx == last_idx, potentially confusing precision + * backtracking logic + */ + "if r0 >= 10 goto l1_%=;" /* checkpoint + mark_precise */ + "goto l0_%=;" + "l1_%=:" + "exit;" + ::: __clobber_common + ); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 1bdf2b43e49eaf31395ab93177e77b71be36ec2d..3d5cd51071f04725f4043d5aa83848c58602c9d5 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -442,7 +442,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge from insn 0 to 0", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { @@ -799,7 +799,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { @@ -811,7 +811,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c index f9297900cea6d44ecd64cf00f3f60099bb17a70a..78f19c255f20b466d4f022029217da87260a4800 100644 --- a/tools/testing/selftests/bpf/verifier/ld_imm64.c +++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c @@ -9,8 +9,8 @@ BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", + .errstr = "jump into the middle of ldimm64 insn 1", + .errstr_unpriv = "jump into the middle of ldimm64 insn 1", .result = REJECT, }, { @@ -23,8 +23,8 @@ BPF_LD_IMM64(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", + .errstr = "jump into the middle of ldimm64 insn 1", + .errstr_unpriv = "jump into the middle of ldimm64 insn 1", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 591ca9637b23e883cacea7293070a546824c2d81..b604c570309a7fb41c2edfd2f7f3164ec523f870 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) struct xdp_info *meta = data - sizeof(struct xdp_info); if (meta->count != pkt->pkt_nb) { - ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", - __func__, pkt->pkt_nb, meta->count); + ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n", + __func__, pkt->pkt_nb, + (unsigned long long)meta->count); return false; } @@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp if (addr >= umem->num_frames * umem->frame_size || addr + len > umem->num_frames * umem->frame_size) { - ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); + ksft_print_msg("Frag invalid addr: %llx len: %u\n", + (unsigned long long)addr, len); return false; } if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { - ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len); + ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", + (unsigned long long)addr, len); return false; } @@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); ksft_print_msg("[%s] Too many packets completed\n", __func__); - ksft_print_msg("Last completion address: %llx\n", addr); + ksft_print_msg("Last completion address: %llx\n", + (unsigned long long)addr); return TEST_FAILURE; } @@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject) } if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { - ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, + ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n", + __func__, + (unsigned long long)stats.tx_invalid_descs, ifobject->xsk->pkt_stream->nb_pkts); return TEST_FAILURE; } diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index cc920c79ff1c32e9a27a714c29d255a95f189fd0..4ff10ea61461796385159ae8b6fd88ae183bbbcd 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -45,3 +45,4 @@ mdwe_test gup_longterm mkdirty va_high_addr_switch +hugetlb_fault_after_madv diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 0161fb49fc6ef1dbbc712f472f940ab959d5301d..befab43719badff842fd771eb52f54fbc98b510c 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -94,19 +94,19 @@ int init_uffd(void) uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); if (uffd == -1) - ksft_exit_fail_msg("uffd syscall failed\n"); + return uffd; uffdio_api.api = UFFD_API; uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC | UFFD_FEATURE_WP_HUGETLBFS_SHMEM; if (ioctl(uffd, UFFDIO_API, &uffdio_api)) - ksft_exit_fail_msg("UFFDIO_API\n"); + return -1; if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) || !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) || !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) || !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) - ksft_exit_fail_msg("UFFDIO_API error %llu\n", uffdio_api.api); + return -1; return 0; } @@ -1151,7 +1151,7 @@ int sanity_tests(void) /* 9. Memory mapped file */ fd = open(__FILE__, O_RDONLY); if (fd < 0) - ksft_exit_fail_msg("%s Memory mapped file\n"); + ksft_exit_fail_msg("%s Memory mapped file\n", __func__); ret = stat(__FILE__, &sbuf); if (ret < 0) @@ -1159,7 +1159,7 @@ int sanity_tests(void) fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); tmp_buf = malloc(sbuf.st_size); memcpy(tmp_buf, fmem, sbuf.st_size); @@ -1189,7 +1189,7 @@ int sanity_tests(void) fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); wp_init(fmem, buf_size); wp_addr_range(fmem, buf_size); @@ -1479,6 +1479,10 @@ int main(void) struct stat sbuf; ksft_print_header(); + + if (init_uffd()) + return ksft_exit_pass(); + ksft_set_plan(115); page_size = getpagesize(); @@ -1488,9 +1492,6 @@ int main(void) if (pagemap_fd < 0) return -EINVAL; - if (init_uffd()) - ksft_exit_fail_msg("uffd init failed\n"); - /* 1. Sanity testing */ sanity_tests_sd(); @@ -1595,7 +1596,7 @@ int main(void) fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); wp_init(fmem, sbuf.st_size); wp_addr_range(fmem, sbuf.st_size); @@ -1623,7 +1624,7 @@ int main(void) fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) - ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno)); + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); wp_init(fmem, buf_size); wp_addr_range(fmem, buf_size); diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index cc16f6ca85333225004f06d5e7083700dda1d8c9..00757445278eda9f4e174aa176aa69b05dbeea5e 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -223,9 +223,12 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise +nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) # For this test, we need one and just one huge page echo 1 > /proc/sys/vm/nr_hugepages CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv +# Restore the previous number of huge pages, since further tests rely on it +echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages if test_selected "hugetlb"; then echo "NOTE: These hugetlb tests provide minimal coverage. Use" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 75a2438efdf3737c038f947075f3705ee6042d42..3c94f2f194d68188d8a0002779ba40c2ab0ef0eb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3240,7 +3240,7 @@ fastclose_tests() if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 + chk_join_nr 0 0 0 0 0 0 1 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 fi