Skip to content
Snippets Groups Projects
  • Mateusz Guzik's avatar
    lockref: stop doing cpu_relax in the cmpxchg loop · f5fe24ef
    Mateusz Guzik authored
    
    On the x86-64 architecture even a failing cmpxchg grants exclusive
    access to the cacheline, making it preferable to retry the failed op
    immediately instead of stalling with the pause instruction.
    
    To illustrate the impact, below are benchmark results obtained by
    running various will-it-scale tests on top of the 6.2-rc3 kernel and
    Cascade Lake (2 sockets * 24 cores * 2 threads) CPU.
    
    All results in ops/s.  Note there is some variance in re-runs, but the
    code is consistently faster when contention is present.
    
      open3 ("Same file open/close"):
      proc          stock       no-pause
         1         805603         814942       (+%1)
         2        1054980        1054781       (-0%)
         8        1544802        1822858      (+18%)
        24        1191064        2199665      (+84%)
        48         851582        1469860      (+72%)
        96         609481        1427170     (+134%)
    
      fstat2 ("Same file fstat"):
      proc          stock       no-pause
         1        3013872        3047636       (+1%)
         2        4284687        4400421       (+2%)
         8        3257721        5530156      (+69%)
        24        2239819        5466127     (+144%)
        48        1701072        5256609     (+209%)
        96        1269157        6649326     (+423%)
    
    Additionally, a kernel with a private patch to help access() scalability:
    access2 ("Same file access"):
    
      proc          stock        patched      patched
                                             +nopause
        24        2378041        2005501      5370335  (-15% / +125%)
    
    That is, fixing the problems in access itself *reduces* scalability
    after the cacheline ping-pong only happens in lockref with the pause
    instruction.
    
    Note that fstat and access benchmarks are not currently integrated into
    will-it-scale, but interested parties can find them in pull requests to
    said project.
    
    Code at hand has a rather tortured history.  First modification showed
    up in commit d472d9d9 ("lockref: Relax in cmpxchg loop"), written
    with Itanium in mind.  Later it got patched up to use an arch-dependent
    macro to stop doing it on s390 where it caused a significant regression.
    Said macro had undergone revisions and was ultimately eliminated later,
    going back to cpu_relax.
    
    While I intended to only remove cpu_relax for x86-64, I got the
    following comment from Linus:
    
        I would actually prefer just removing it entirely and see if
        somebody else hollers. You have the numbers to prove it hurts on
        real hardware, and I don't think we have any numbers to the
        contrary.
    
        So I think it's better to trust the numbers and remove it as a
        failure, than say "let's just remove it on x86-64 and leave
        everybody else with the potentially broken code"
    
    Additionally, Will Deacon (maintainer of the arm64 port, one of the
    architectures previously benchmarked):
    
        So, from the arm64 side of the fence, I'm perfectly happy just
        removing the cpu_relax() calls from lockref.
    
    As such, come back full circle in history and whack it altogether.
    
    Signed-off-by: default avatarMateusz Guzik <mjguzik@gmail.com>
    Link: https://lore.kernel.org/all/CAGudoHHx0Nqg6DE70zAVA75eV-HXfWyhVMWZ-aSeOofkA_=WdA@mail.gmail.com/
    
    
    Acked-by: Tony Luck <tony.luck@intel.com> # ia64
    Acked-by: Nicholas Piggin <npiggin@gmail.com> # powerpc
    Acked-by: Will Deacon <will@kernel.org> # arm64
    Acked-by: default avatarPeter Zijlstra <peterz@infradead.org>
    Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
    f5fe24ef
lockref.c 3.91 KiB
// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/lockref.h>

#if USE_CMPXCHG_LOCKREF

/*
 * Note that the "cmpxchg()" reloads the "old" value for the
 * failure case.
 */
#define CMPXCHG_LOOP(CODE, SUCCESS) do {					\
	int retry = 100;							\
	struct lockref old;							\
	BUILD_BUG_ON(sizeof(old) != 8);						\
	old.lock_count = READ_ONCE(lockref->lock_count);			\
	while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {  	\
		struct lockref new = old;					\
		CODE								\
		if (likely(try_cmpxchg64_relaxed(&lockref->lock_count,		\
						 &old.lock_count,		\
						 new.lock_count))) {		\
			SUCCESS;						\
		}								\
		if (!--retry)							\
			break;							\
	}									\
} while (0)

#else

#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0)

#endif

/**
 * lockref_get - Increments reference count unconditionally
 * @lockref: pointer to lockref structure
 *
 * This operation is only valid if you already hold a reference
 * to the object, so you know the count cannot be zero.
 */
void lockref_get(struct lockref *lockref)
{
	CMPXCHG_LOOP(
		new.count++;
	,
		return;
	);

	spin_lock(&lockref->lock);
	lockref->count++;
	spin_unlock(&lockref->lock);
}
EXPORT_SYMBOL(lockref_get);

/**
 * lockref_get_not_zero - Increments count unless the count is 0 or dead
 * @lockref: pointer to lockref structure
 * Return: 1 if count updated successfully or 0 if count was zero
 */
int lockref_get_not_zero(struct lockref *lockref)
{
	int retval;

	CMPXCHG_LOOP(
		new.count++;
		if (old.count <= 0)
			return 0;
	,
		return 1;
	);

	spin_lock(&lockref->lock);
	retval = 0;
	if (lockref->count > 0) {
		lockref->count++;
		retval = 1;
	}
	spin_unlock(&lockref->lock);
	return retval;
}
EXPORT_SYMBOL(lockref_get_not_zero);

/**
 * lockref_put_not_zero - Decrements count unless count <= 1 before decrement
 * @lockref: pointer to lockref structure
 * Return: 1 if count updated successfully or 0 if count would become zero
 */
int lockref_put_not_zero(struct lockref *lockref)
{
	int retval;

	CMPXCHG_LOOP(
		new.count--;
		if (old.count <= 1)
			return 0;
	,
		return 1;
	);

	spin_lock(&lockref->lock);
	retval = 0;
	if (lockref->count > 1) {
		lockref->count--;
		retval = 1;
	}
	spin_unlock(&lockref->lock);
	return retval;
}
EXPORT_SYMBOL(lockref_put_not_zero);

/**
 * lockref_put_return - Decrement reference count if possible
 * @lockref: pointer to lockref structure
 *
 * Decrement the reference count and return the new value.
 * If the lockref was dead or locked, return an error.
 */
int lockref_put_return(struct lockref *lockref)
{
	CMPXCHG_LOOP(
		new.count--;
		if (old.count <= 0)
			return -1;
	,
		return new.count;
	);
	return -1;
}
EXPORT_SYMBOL(lockref_put_return);

/**
 * lockref_put_or_lock - decrements count unless count <= 1 before decrement
 * @lockref: pointer to lockref structure
 * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
 */
int lockref_put_or_lock(struct lockref *lockref)
{
	CMPXCHG_LOOP(
		new.count--;
		if (old.count <= 1)
			break;
	,
		return 1;
	);

	spin_lock(&lockref->lock);
	if (lockref->count <= 1)
		return 0;
	lockref->count--;
	spin_unlock(&lockref->lock);
	return 1;
}
EXPORT_SYMBOL(lockref_put_or_lock);

/**
 * lockref_mark_dead - mark lockref dead
 * @lockref: pointer to lockref structure
 */
void lockref_mark_dead(struct lockref *lockref)
{
	assert_spin_locked(&lockref->lock);
	lockref->count = -128;
}
EXPORT_SYMBOL(lockref_mark_dead);

/**
 * lockref_get_not_dead - Increments count unless the ref is dead
 * @lockref: pointer to lockref structure
 * Return: 1 if count updated successfully or 0 if lockref was dead
 */
int lockref_get_not_dead(struct lockref *lockref)
{
	int retval;

	CMPXCHG_LOOP(
		new.count++;
		if (old.count < 0)
			return 0;
	,
		return 1;
	);

	spin_lock(&lockref->lock);
	retval = 0;
	if (lockref->count >= 0) {
		lockref->count++;
		retval = 1;
	}
	spin_unlock(&lockref->lock);
	return retval;
}
EXPORT_SYMBOL(lockref_get_not_dead);