mirror of
https://github.com/element-hq/synapse.git
synced 2024-12-26 17:36:27 +00:00
Improve lock performance when a lot of locks are waiting (#16840)
When a lot of locks are waiting for a single lock, notifying all locks independently with `call_later` on each release is really costly and incurs some kind of async contention, where the CPU is spinning a lot for not much. The included test is taking around 30s before the change, and 0.5s after. It was found following failing tests with https://github.com/element-hq/synapse/pull/16827.
This commit is contained in:
parent
a111ba0207
commit
cb562d73aa
1
changelog.d/16840.misc
Normal file
1
changelog.d/16840.misc
Normal file
|
@ -0,0 +1 @@
|
|||
Improve lock performance when a lot of locks are all waiting for a single lock to be released.
|
|
@ -182,12 +182,15 @@ class WorkerLocksHandler:
|
|||
if not locks:
|
||||
return
|
||||
|
||||
def _wake_deferred(deferred: defer.Deferred) -> None:
|
||||
if not deferred.called:
|
||||
deferred.callback(None)
|
||||
def _wake_all_locks(
|
||||
locks: Collection[Union[WaitingLock, WaitingMultiLock]]
|
||||
) -> None:
|
||||
for lock in locks:
|
||||
deferred = lock.deferred
|
||||
if not deferred.called:
|
||||
deferred.callback(None)
|
||||
|
||||
for lock in locks:
|
||||
self._clock.call_later(0, _wake_deferred, lock.deferred)
|
||||
self._clock.call_later(0, _wake_all_locks, locks)
|
||||
|
||||
@wrap_as_background_process("_cleanup_locks")
|
||||
async def _cleanup_locks(self) -> None:
|
||||
|
|
|
@ -27,6 +27,7 @@ from synapse.util import Clock
|
|||
|
||||
from tests import unittest
|
||||
from tests.replication._base import BaseMultiWorkerStreamTestCase
|
||||
from tests.utils import test_timeout
|
||||
|
||||
|
||||
class WorkerLockTestCase(unittest.HomeserverTestCase):
|
||||
|
@ -50,6 +51,28 @@ class WorkerLockTestCase(unittest.HomeserverTestCase):
|
|||
self.get_success(d2)
|
||||
self.get_success(lock2.__aexit__(None, None, None))
|
||||
|
||||
def test_lock_contention(self) -> None:
|
||||
"""Test lock contention when a lot of locks wait on a single worker"""
|
||||
|
||||
# It takes around 0.5s on a 5+ years old laptop
|
||||
with test_timeout(5):
|
||||
nb_locks = 500
|
||||
d = self._take_locks(nb_locks)
|
||||
self.assertEqual(self.get_success(d), nb_locks)
|
||||
|
||||
async def _take_locks(self, nb_locks: int) -> int:
|
||||
locks = [
|
||||
self.hs.get_worker_locks_handler().acquire_lock("test_lock", "")
|
||||
for _ in range(nb_locks)
|
||||
]
|
||||
|
||||
nb_locks_taken = 0
|
||||
for lock in locks:
|
||||
async with lock:
|
||||
nb_locks_taken += 1
|
||||
|
||||
return nb_locks_taken
|
||||
|
||||
|
||||
class WorkerLockWorkersTestCase(BaseMultiWorkerStreamTestCase):
|
||||
def prepare(
|
||||
|
|
|
@ -21,7 +21,20 @@
|
|||
|
||||
import atexit
|
||||
import os
|
||||
from typing import Any, Callable, Dict, List, Tuple, Type, TypeVar, Union, overload
|
||||
import signal
|
||||
from types import FrameType, TracebackType
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
import attr
|
||||
from typing_extensions import Literal, ParamSpec
|
||||
|
@ -379,3 +392,30 @@ def checked_cast(type: Type[T], x: object) -> T:
|
|||
"""
|
||||
assert isinstance(x, type)
|
||||
return x
|
||||
|
||||
|
||||
class TestTimeout(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class test_timeout:
|
||||
def __init__(self, seconds: int, error_message: Optional[str] = None) -> None:
|
||||
if error_message is None:
|
||||
error_message = "test timed out after {}s.".format(seconds)
|
||||
self.seconds = seconds
|
||||
self.error_message = error_message
|
||||
|
||||
def handle_timeout(self, signum: int, frame: Optional[FrameType]) -> None:
|
||||
raise TestTimeout(self.error_message)
|
||||
|
||||
def __enter__(self) -> None:
|
||||
signal.signal(signal.SIGALRM, self.handle_timeout)
|
||||
signal.alarm(self.seconds)
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: Optional[Type[BaseException]],
|
||||
exc_val: Optional[BaseException],
|
||||
exc_tb: Optional[TracebackType],
|
||||
) -> None:
|
||||
signal.alarm(0)
|
||||
|
|
Loading…
Reference in a new issue