synapse/tests/utils.py
Mathieu Velten cb562d73aa
Improve lock performance when a lot of locks are waiting (#16840)
When a lot of locks are waiting for a single lock, notifying all locks
independently with `call_later` on each release is really costly and
incurs some kind of async contention, where the CPU is spinning a lot
for not much.

The included test is taking around 30s before the change, and 0.5s
after.

It was found following failing tests with
https://github.com/element-hq/synapse/pull/16827.
2024-03-14 13:49:54 +00:00

422 lines
14 KiB
Python

#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2014-2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
import atexit
import os
import signal
from types import FrameType, TracebackType
from typing import (
Any,
Callable,
Dict,
List,
Optional,
Tuple,
Type,
TypeVar,
Union,
overload,
)
import attr
from typing_extensions import Literal, ParamSpec
from synapse.api.constants import EventTypes
from synapse.api.room_versions import RoomVersions
from synapse.config.homeserver import HomeServerConfig
from synapse.config.server import DEFAULT_ROOM_VERSION
from synapse.logging.context import current_context, set_current_context
from synapse.server import HomeServer
from synapse.storage.database import LoggingDatabaseConnection
from synapse.storage.engines import create_engine
from synapse.storage.prepare_database import prepare_database
try:
import authlib # noqa: F401
HAS_AUTHLIB = True
except ImportError:
HAS_AUTHLIB = False
# set this to True to run the tests against postgres instead of sqlite.
#
# When running under postgres, we first create a base database with the name
# POSTGRES_BASE_DB and update it to the current schema. Then, for each test case, we
# create another unique database, using the base database as a template.
USE_POSTGRES_FOR_TESTS = os.environ.get("SYNAPSE_POSTGRES", False)
LEAVE_DB = os.environ.get("SYNAPSE_LEAVE_DB", False)
POSTGRES_USER = os.environ.get("SYNAPSE_POSTGRES_USER", None)
POSTGRES_HOST = os.environ.get("SYNAPSE_POSTGRES_HOST", None)
POSTGRES_PASSWORD = os.environ.get("SYNAPSE_POSTGRES_PASSWORD", None)
POSTGRES_PORT = (
int(os.environ["SYNAPSE_POSTGRES_PORT"])
if "SYNAPSE_POSTGRES_PORT" in os.environ
else None
)
POSTGRES_BASE_DB = "_synapse_unit_tests_base_%s" % (os.getpid(),)
# When debugging a specific test, it's occasionally useful to write the
# DB to disk and query it with the sqlite CLI.
SQLITE_PERSIST_DB = os.environ.get("SYNAPSE_TEST_PERSIST_SQLITE_DB") is not None
# the dbname we will connect to in order to create the base database.
POSTGRES_DBNAME_FOR_INITIAL_CREATE = "postgres"
def setupdb() -> None:
# If we're using PostgreSQL, set up the db once
if USE_POSTGRES_FOR_TESTS:
# create a PostgresEngine
db_engine = create_engine({"name": "psycopg2", "args": {}})
# connect to postgres to create the base database.
db_conn = db_engine.module.connect(
user=POSTGRES_USER,
host=POSTGRES_HOST,
port=POSTGRES_PORT,
password=POSTGRES_PASSWORD,
dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE,
)
db_engine.attempt_to_set_autocommit(db_conn, autocommit=True)
cur = db_conn.cursor()
cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,))
cur.execute(
"CREATE DATABASE %s ENCODING 'UTF8' LC_COLLATE='C' LC_CTYPE='C' "
"template=template0;" % (POSTGRES_BASE_DB,)
)
cur.close()
db_conn.close()
# Set up in the db
db_conn = db_engine.module.connect(
dbname=POSTGRES_BASE_DB,
user=POSTGRES_USER,
host=POSTGRES_HOST,
port=POSTGRES_PORT,
password=POSTGRES_PASSWORD,
)
logging_conn = LoggingDatabaseConnection(db_conn, db_engine, "tests")
prepare_database(logging_conn, db_engine, None)
logging_conn.close()
def _cleanup() -> None:
db_conn = db_engine.module.connect(
user=POSTGRES_USER,
host=POSTGRES_HOST,
port=POSTGRES_PORT,
password=POSTGRES_PASSWORD,
dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE,
)
db_engine.attempt_to_set_autocommit(db_conn, autocommit=True)
cur = db_conn.cursor()
cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,))
cur.close()
db_conn.close()
atexit.register(_cleanup)
@overload
def default_config(name: str, parse: Literal[False] = ...) -> Dict[str, object]: ...
@overload
def default_config(name: str, parse: Literal[True]) -> HomeServerConfig: ...
def default_config(
name: str, parse: bool = False
) -> Union[Dict[str, object], HomeServerConfig]:
"""
Create a reasonable test config.
"""
config_dict = {
"server_name": name,
# Setting this to an empty list turns off federation sending.
"federation_sender_instances": [],
"media_store_path": "media",
# the test signing key is just an arbitrary ed25519 key to keep the config
# parser happy
"signing_key": "ed25519 a_lPym qvioDNmfExFBRPgdTU+wtFYKq4JfwFRv7sYVgWvmgJg",
# Disable trusted key servers, otherwise unit tests might try to actually
# reach out to matrix.org.
"trusted_key_servers": [],
"event_cache_size": 1,
"enable_registration": True,
"enable_registration_captcha": False,
"macaroon_secret_key": "not even a little secret",
"password_providers": [],
"worker_app": None,
"block_non_admin_invites": False,
"federation_domain_whitelist": None,
"filter_timeline_limit": 5000,
"user_directory_search_all_users": False,
"user_consent_server_notice_content": None,
"block_events_without_consent_error": None,
"user_consent_at_registration": False,
"user_consent_policy_name": "Privacy Policy",
"media_storage_providers": [],
"autocreate_auto_join_rooms": True,
"auto_join_rooms": [],
"limit_usage_by_mau": False,
"hs_disabled": False,
"hs_disabled_message": "",
"max_mau_value": 50,
"mau_trial_days": 0,
"mau_stats_only": False,
"mau_limits_reserved_threepids": [],
"admin_contact": None,
"rc_message": {"per_second": 10000, "burst_count": 10000},
"rc_registration": {"per_second": 10000, "burst_count": 10000},
"rc_login": {
"address": {"per_second": 10000, "burst_count": 10000},
"account": {"per_second": 10000, "burst_count": 10000},
"failed_attempts": {"per_second": 10000, "burst_count": 10000},
},
"rc_joins": {
"local": {"per_second": 10000, "burst_count": 10000},
"remote": {"per_second": 10000, "burst_count": 10000},
},
"rc_joins_per_room": {"per_second": 10000, "burst_count": 10000},
"rc_invites": {
"per_room": {"per_second": 10000, "burst_count": 10000},
"per_user": {"per_second": 10000, "burst_count": 10000},
},
"rc_3pid_validation": {"per_second": 10000, "burst_count": 10000},
"saml2_enabled": False,
"public_baseurl": None,
"default_identity_server": None,
"key_refresh_interval": 24 * 60 * 60 * 1000,
"old_signing_keys": {},
"tls_fingerprints": [],
"use_frozen_dicts": False,
# We need a sane default_room_version, otherwise attempts to create
# rooms will fail.
"default_room_version": DEFAULT_ROOM_VERSION,
# disable user directory updates, because they get done in the
# background, which upsets the test runner. Setting this to an
# (obviously) fake worker name disables updating the user directory.
"update_user_directory_from_worker": "does_not_exist_worker_name",
"caches": {"global_factor": 1, "sync_response_cache_duration": 0},
"listeners": [{"port": 0, "type": "http"}],
}
if parse:
config = HomeServerConfig()
config.parse_config_dict(config_dict, "", "")
return config
return config_dict
def mock_getRawHeaders(headers=None): # type: ignore[no-untyped-def]
headers = headers if headers is not None else {}
def getRawHeaders(name, default=None): # type: ignore[no-untyped-def]
# If the requested header is present, the real twisted function returns
# List[str] if name is a str and List[bytes] if name is a bytes.
# This mock doesn't support that behaviour.
# Fortunately, none of the current callers of mock_getRawHeaders() provide a
# headers dict, so we don't encounter this discrepancy in practice.
return headers.get(name, default)
return getRawHeaders
P = ParamSpec("P")
@attr.s(slots=True, auto_attribs=True)
class Timer:
absolute_time: float
callback: Callable[[], None]
expired: bool
# TODO: Make this generic over a ParamSpec?
@attr.s(slots=True, auto_attribs=True)
class Looper:
func: Callable[..., Any]
interval: float # seconds
last: float
args: Tuple[object, ...]
kwargs: Dict[str, object]
class MockClock:
now = 1000.0
def __init__(self) -> None:
# Timers in no particular order
self.timers: List[Timer] = []
self.loopers: List[Looper] = []
def time(self) -> float:
return self.now
def time_msec(self) -> int:
return int(self.time() * 1000)
def call_later(
self,
delay: float,
callback: Callable[P, object],
*args: P.args,
**kwargs: P.kwargs,
) -> Timer:
ctx = current_context()
def wrapped_callback() -> None:
set_current_context(ctx)
callback(*args, **kwargs)
t = Timer(self.now + delay, wrapped_callback, False)
self.timers.append(t)
return t
def looping_call(
self,
function: Callable[P, object],
interval: float,
*args: P.args,
**kwargs: P.kwargs,
) -> None:
self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs))
def cancel_call_later(self, timer: Timer, ignore_errs: bool = False) -> None:
if timer.expired:
if not ignore_errs:
raise Exception("Cannot cancel an expired timer")
timer.expired = True
self.timers = [t for t in self.timers if t != timer]
# For unit testing
def advance_time(self, secs: float) -> None:
self.now += secs
timers = self.timers
self.timers = []
for t in timers:
if t.expired:
raise Exception("Timer already expired")
if self.now >= t.absolute_time:
t.expired = True
t.callback()
else:
self.timers.append(t)
for looped in self.loopers:
if looped.last + looped.interval < self.now:
looped.func(*looped.args, **looped.kwargs)
looped.last = self.now
def advance_time_msec(self, ms: float) -> None:
self.advance_time(ms / 1000.0)
async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None:
"""Creates and persist a creation event for the given room"""
persistence_store = hs.get_storage_controllers().persistence
assert persistence_store is not None
store = hs.get_datastores().main
event_builder_factory = hs.get_event_builder_factory()
event_creation_handler = hs.get_event_creation_handler()
await store.store_room(
room_id=room_id,
room_creator_user_id=creator_id,
is_public=False,
room_version=RoomVersions.V1,
)
builder = event_builder_factory.for_room_version(
RoomVersions.V1,
{
"type": EventTypes.Create,
"state_key": "",
"sender": creator_id,
"room_id": room_id,
"content": {},
},
)
event, unpersisted_context = await event_creation_handler.create_new_client_event(
builder
)
context = await unpersisted_context.persist(event)
await persistence_store.persist_event(event, context)
T = TypeVar("T")
def checked_cast(type: Type[T], x: object) -> T:
"""A version of typing.cast that is checked at runtime.
We have our own function for this for two reasons:
1. typing.cast itself is deliberately a no-op at runtime, see
https://docs.python.org/3/library/typing.html#typing.cast
2. To help workaround a mypy-zope bug https://github.com/Shoobx/mypy-zope/issues/91
where mypy would erroneously consider `isinstance(x, type)` to be false in all
circumstances.
For this to make sense, `T` needs to be something that `isinstance` can check; see
https://docs.python.org/3/library/functions.html?highlight=isinstance#isinstance
https://docs.python.org/3/glossary.html#term-abstract-base-class
https://docs.python.org/3/library/typing.html#typing.runtime_checkable
for more details.
"""
assert isinstance(x, type)
return x
class TestTimeout(Exception):
pass
class test_timeout:
def __init__(self, seconds: int, error_message: Optional[str] = None) -> None:
if error_message is None:
error_message = "test timed out after {}s.".format(seconds)
self.seconds = seconds
self.error_message = error_message
def handle_timeout(self, signum: int, frame: Optional[FrameType]) -> None:
raise TestTimeout(self.error_message)
def __enter__(self) -> None:
signal.signal(signal.SIGALRM, self.handle_timeout)
signal.alarm(self.seconds)
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[TracebackType],
) -> None:
signal.alarm(0)