diff --git a/roomserver/internal/input/input.go b/roomserver/internal/input/input.go index d76e354ba..d6ad77fe6 100644 --- a/roomserver/internal/input/input.go +++ b/roomserver/internal/input/input.go @@ -102,7 +102,7 @@ func (r *Inputer) Start() error { _ = msg.InProgress() // resets the acknowledgement wait timer defer eventsInProgress.Delete(index) defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": roomID}).Dec() - retry, err := r.processRoomEventUsingUpdater(context.Background(), roomID, &inputRoomEvent) + action, err := r.processRoomEventUsingUpdater(context.Background(), roomID, &inputRoomEvent) if err != nil { if !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) { sentry.CaptureException(err) @@ -113,9 +113,10 @@ func (r *Inputer) Start() error { "type": inputRoomEvent.Event.Type(), }).Warn("Roomserver failed to process async event") } - if retry { + switch action { + case retryLater: _ = msg.Nak() - } else { + case doNotRetry: _ = msg.Ack() } }) @@ -137,35 +138,41 @@ func (r *Inputer) Start() error { return err } +type retryAction int + +const ( + doNotRetry retryAction = iota + retryLater +) + // processRoomEventUsingUpdater opens up a room updater and tries to -// process the event. It returns two values: the bool signifying whether -// we should retry later if possible (i.e. using NATS, because we couldn't -// commit the transaction) and an error signifying anything else that may -// have gone wrong. +// process the event. It returns whether or not we should positively +// or negatively acknowledge the event (i.e. for NATS) and an error +// if it occurred. func (r *Inputer) processRoomEventUsingUpdater( ctx context.Context, roomID string, inputRoomEvent *api.InputRoomEvent, -) (bool, error) { +) (retryAction, error) { roomInfo, err := r.DB.RoomInfo(ctx, roomID) if err != nil { - return false, fmt.Errorf("r.DB.RoomInfo: %w", err) + return doNotRetry, fmt.Errorf("r.DB.RoomInfo: %w", err) } updater, err := r.DB.GetRoomUpdater(ctx, roomInfo) if err != nil { - return true, fmt.Errorf("r.DB.GetRoomUpdater: %w", err) + return retryLater, fmt.Errorf("r.DB.GetRoomUpdater: %w", err) } commit, err := r.processRoomEvent(ctx, updater, inputRoomEvent) if commit { if cerr := updater.Commit(); cerr != nil { - return true, fmt.Errorf("updater.Commit: %w", cerr) + return retryLater, fmt.Errorf("updater.Commit: %w", cerr) } } else { if rerr := updater.Rollback(); rerr != nil { - return true, fmt.Errorf("updater.Rollback: %w", rerr) + return retryLater, fmt.Errorf("updater.Rollback: %w", rerr) } } - return false, err + return doNotRetry, err } // InputRoomEvents implements api.RoomserverInternalAPI diff --git a/roomserver/storage/interface.go b/roomserver/storage/interface.go index a9851e05b..90365eefd 100644 --- a/roomserver/storage/interface.go +++ b/roomserver/storage/interface.go @@ -24,8 +24,8 @@ import ( ) type Database interface { - // Do we support processing input events for more than one room at a time? - SupportsConcurrentRoomInputs() bool + // Do we support transactional isolation on this database engine? + SupportsTransactionalIsolation() bool // RoomInfo returns room information for the given room ID, or nil if there is no room. RoomInfo(ctx context.Context, roomID string) (*types.RoomInfo, error) // Store the room state at an event in the database diff --git a/roomserver/storage/shared/room_updater.go b/roomserver/storage/shared/room_updater.go index bb9f5dc62..ec44323f5 100644 --- a/roomserver/storage/shared/room_updater.go +++ b/roomserver/storage/shared/room_updater.go @@ -25,7 +25,16 @@ func rollback(txn *sql.Tx) { txn.Rollback() // nolint: errcheck } -func NewRoomUpdater(ctx context.Context, d *Database, txn *sql.Tx, roomInfo *types.RoomInfo) (*RoomUpdater, error) { +func NewRoomUpdater(ctx context.Context, d *Database, roomInfo *types.RoomInfo) (*RoomUpdater, error) { + var txn *sql.Tx + if d.SupportsTransactionalIsolation() { + var err error + txn, err = d.DB.Begin() + if err != nil { + return nil, err + } + } + // If the roomInfo is nil then that means that the room doesn't exist // yet, so we can't do `SelectLatestEventsNIDsForUpdate` because that // would involve locking a row on the table that doesn't exist. Instead diff --git a/roomserver/storage/shared/storage.go b/roomserver/storage/shared/storage.go index 2df88534d..e6962e170 100644 --- a/roomserver/storage/shared/storage.go +++ b/roomserver/storage/shared/storage.go @@ -45,7 +45,7 @@ type Database struct { GetRoomUpdaterFn func(ctx context.Context, roomInfo *types.RoomInfo) (*RoomUpdater, error) } -func (d *Database) SupportsConcurrentRoomInputs() bool { +func (d *Database) SupportsTransactionalIsolation() bool { return true } @@ -447,7 +447,7 @@ func (d *Database) events( if err != nil { return nil, err } - eventIDs, _ := d.EventsTable.BulkSelectEventID(ctx, nil, eventNIDs) + eventIDs, _ := d.EventsTable.BulkSelectEventID(ctx, txn, eventNIDs) if err != nil { eventIDs = map[types.EventNID]string{} } @@ -501,16 +501,15 @@ func (d *Database) MembershipUpdater( ctx context.Context, roomID, targetUserID string, targetLocal bool, roomVersion gomatrixserverlib.RoomVersion, ) (*MembershipUpdater, error) { - txn, err := d.DB.Begin() - if err != nil { - return nil, err + var txn *sql.Tx + var err error + if d.SupportsTransactionalIsolation() { + txn, err = d.DB.Begin() + if err != nil { + return nil, err + } } - var updater *MembershipUpdater - _ = d.Writer.Do(d.DB, txn, func(txn *sql.Tx) error { - updater, err = NewMembershipUpdater(ctx, d, txn, roomID, targetUserID, targetLocal, roomVersion) - return err - }) - return updater, err + return NewMembershipUpdater(ctx, d, txn, roomID, targetUserID, targetLocal, roomVersion) } func (d *Database) GetRoomUpdater( @@ -519,16 +518,7 @@ func (d *Database) GetRoomUpdater( if d.GetRoomUpdaterFn != nil { return d.GetRoomUpdaterFn(ctx, roomInfo) } - txn, err := d.DB.Begin() - if err != nil { - return nil, err - } - var updater *RoomUpdater - _ = d.Writer.Do(d.DB, txn, func(txn *sql.Tx) error { - updater, err = NewRoomUpdater(ctx, d, txn, roomInfo) - return err - }) - return updater, err + return NewRoomUpdater(ctx, d, roomInfo) } func (d *Database) StoreEvent( diff --git a/roomserver/storage/sqlite3/storage.go b/roomserver/storage/sqlite3/storage.go index 325c253b5..3e6b9ce00 100644 --- a/roomserver/storage/sqlite3/storage.go +++ b/roomserver/storage/sqlite3/storage.go @@ -23,7 +23,6 @@ import ( "github.com/matrix-org/dendrite/internal/sqlutil" "github.com/matrix-org/dendrite/roomserver/storage/shared" "github.com/matrix-org/dendrite/roomserver/storage/sqlite3/deltas" - "github.com/matrix-org/dendrite/roomserver/types" "github.com/matrix-org/dendrite/setup/config" "github.com/matrix-org/gomatrixserverlib" ) @@ -193,7 +192,7 @@ func (d *Database) prepare(db *sql.DB, cache caching.RoomServerCaches) error { return nil } -func (d *Database) SupportsConcurrentRoomInputs() bool { +func (d *Database) SupportsTransactionalIsolation() bool { // This isn't supported in SQLite mode yet because of issues with // database locks. // TODO: Look at this again - the problem is probably to do with @@ -201,18 +200,6 @@ func (d *Database) SupportsConcurrentRoomInputs() bool { return false } -func (d *Database) GetRoomUpdater( - ctx context.Context, roomInfo *types.RoomInfo, -) (*shared.RoomUpdater, error) { - // TODO: Do not use transactions. We should be holding open this transaction but we cannot have - // multiple write transactions on sqlite. The code will perform additional - // write transactions independent of this one which will consistently cause - // 'database is locked' errors. As sqlite doesn't support multi-process on the - // same DB anyway, and we only execute updates sequentially, the only worries - // are for rolling back when things go wrong. (atomicity) - return shared.NewRoomUpdater(ctx, &d.Database, nil, roomInfo) -} - func (d *Database) MembershipUpdater( ctx context.Context, roomID, targetUserID string, targetLocal bool, roomVersion gomatrixserverlib.RoomVersion,