From a3aaa86ea34cbbd83af2607da644dda640f1f99b Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 26 May 2017 09:57:09 +0200 Subject: [PATCH] mediaapi: Add initial skeleton --- .../cmd/dendrite-media-api-server/main.go | 83 ++++++++++ .../dendrite/mediaapi/config/config.go | 34 ++++ .../dendrite/mediaapi/routing/routing.go | 66 ++++++++ .../dendrite/mediaapi/types/types.go | 72 +++++++++ .../dendrite/mediaapi/writers/download.go | 96 +++++++++++ .../dendrite/mediaapi/writers/upload.go | 149 ++++++++++++++++++ 6 files changed, 500 insertions(+) create mode 100644 src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/config/config.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/types/types.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/writers/download.go create mode 100644 src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go new file mode 100644 index 000000000..bfc1ee0ec --- /dev/null +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -0,0 +1,83 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "net/http" + "os" + "path/filepath" + "strconv" + + "github.com/matrix-org/dendrite/common" + "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/routing" + "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" + + log "github.com/Sirupsen/logrus" +) + +var ( + bindAddr = os.Getenv("BIND_ADDRESS") + dataSource = os.Getenv("DATABASE") + logDir = os.Getenv("LOG_DIR") + serverName = os.Getenv("SERVER_NAME") + basePath = os.Getenv("BASE_PATH") + // Note: if the MAX_FILE_SIZE_BYTES is set to 0, it will be unlimited + maxFileSizeBytesString = os.Getenv("MAX_FILE_SIZE_BYTES") +) + +func main() { + common.SetupLogging(logDir) + + if bindAddr == "" { + log.Panic("No BIND_ADDRESS environment variable found.") + } + if basePath == "" { + log.Panic("No BASE_PATH environment variable found.") + } + absBasePath, err := filepath.Abs(basePath) + if err != nil { + log.WithError(err).WithField("BASE_PATH", basePath).Panic("BASE_PATH is invalid (must be able to make absolute)") + } + + if serverName == "" { + serverName = "localhost" + } + maxFileSizeBytes, err := strconv.ParseInt(maxFileSizeBytesString, 10, 64) + if err != nil { + maxFileSizeBytes = 10 * 1024 * 1024 + log.WithError(err).WithField("MAX_FILE_SIZE_BYTES", maxFileSizeBytesString).Warnf("Failed to parse MAX_FILE_SIZE_BYTES. Defaulting to %v bytes.", maxFileSizeBytes) + } + + cfg := &config.MediaAPI{ + ServerName: gomatrixserverlib.ServerName(serverName), + AbsBasePath: types.Path(absBasePath), + MaxFileSizeBytes: types.FileSizeBytes(maxFileSizeBytes), + DataSource: dataSource, + } + + log.WithFields(log.Fields{ + "BASE_PATH": absBasePath, + "BIND_ADDRESS": bindAddr, + "DATABASE": dataSource, + "LOG_DIR": logDir, + "MAX_FILE_SIZE_BYTES": maxFileSizeBytes, + "SERVER_NAME": serverName, + }).Info("Starting mediaapi") + + routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg) + log.Fatal(http.ListenAndServe(bindAddr, nil)) +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go new file mode 100644 index 000000000..a2d8f43c6 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -0,0 +1,34 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" +) + +// MediaAPI contains the config information necessary to spin up a mediaapi process. +type MediaAPI struct { + // The name of the server. This is usually the domain name, e.g 'matrix.org', 'localhost'. + ServerName gomatrixserverlib.ServerName `yaml:"server_name"` + // The absolute base path to where media files will be stored. + AbsBasePath types.Path `yaml:"abs_base_path"` + // The maximum file size in bytes that is allowed to be stored on this server. + // Note that remote files larger than this can still be proxied to a client, they will just not be cached. + // Note: if MaxFileSizeBytes is set to 0, the size is unlimited. + MaxFileSizeBytes types.FileSizeBytes `yaml:"max_file_size_bytes"` + // The postgres connection config for connecting to the database e.g a postgres:// URI + DataSource string `yaml:"database"` +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go new file mode 100644 index 000000000..757870c56 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -0,0 +1,66 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package routing + +import ( + "net/http" + "sync" + + "github.com/gorilla/mux" + "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/dendrite/mediaapi/writers" + "github.com/matrix-org/gomatrixserverlib" + "github.com/matrix-org/util" + "github.com/prometheus/client_golang/prometheus" +) + +const pathPrefixR0 = "/_matrix/media/v1" + +// Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client +// to clients which need to make outbound HTTP requests. +func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI) { + apiMux := mux.NewRouter() + r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter() + r0mux.Handle("/upload", makeAPI("upload", func(req *http.Request) util.JSONResponse { + return writers.Upload(req, cfg) + })) + + activeRemoteRequests := &types.ActiveRemoteRequests{ + Set: map[string]*sync.Cond{}, + } + r0mux.Handle("/download/{serverName}/{mediaId}", + prometheus.InstrumentHandler("download", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + req = util.RequestWithLogging(req) + + // Set common headers returned regardless of the outcome of the request + util.SetCORSHeaders(w) + // Content-Type will be overridden in case of returning file data, else we respond with JSON-formatted errors + w.Header().Set("Content-Type", "application/json") + + vars := mux.Vars(req) + writers.Download(w, req, gomatrixserverlib.ServerName(vars["serverName"]), types.MediaID(vars["mediaId"]), cfg, activeRemoteRequests) + })), + ) + + servMux.Handle("/metrics", prometheus.Handler()) + servMux.Handle("/api/", http.StripPrefix("/api", apiMux)) +} + +// make a util.JSONRequestHandler function into an http.Handler. +func makeAPI(metricsName string, f func(*http.Request) util.JSONResponse) http.Handler { + h := util.NewJSONRequestHandler(f) + return prometheus.InstrumentHandler(metricsName, util.MakeJSONAPI(h)) +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go new file mode 100644 index 000000000..cef390cf0 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go @@ -0,0 +1,72 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "sync" + + "github.com/matrix-org/gomatrixserverlib" +) + +// ContentDisposition is an HTTP Content-Disposition header string +type ContentDisposition string + +// FileSizeBytes is a file size in bytes +type FileSizeBytes int64 + +// ContentType is an HTTP Content-Type header string representing the MIME type of a request body +type ContentType string + +// Filename is a string representing the name of a file +type Filename string + +// Base64Hash is a base64 URLEncoding string representation of a SHA-256 hash sum +type Base64Hash string + +// Path is an absolute or relative UNIX filesystem path +type Path string + +// MediaID is a string representing the unique identifier for a file (could be a hash but does not have to be) +type MediaID string + +// RequestMethod is an HTTP request method i.e. GET, POST, etc +type RequestMethod string + +// MatrixUserID is a Matrix user ID string in the form @user:domain e.g. @alice:matrix.org +type MatrixUserID string + +// UnixMs is the milliseconds since the Unix epoch +type UnixMs int64 + +// MediaMetadata is metadata associated with a media file +type MediaMetadata struct { + MediaID MediaID + Origin gomatrixserverlib.ServerName + ContentType ContentType + ContentDisposition ContentDisposition + FileSizeBytes FileSizeBytes + CreationTimestamp UnixMs + UploadName Filename + Base64Hash Base64Hash + UserID MatrixUserID +} + +// ActiveRemoteRequests is a lockable map of media URIs requested from remote homeservers +// It is used for ensuring multiple requests for the same file do not clobber each other. +type ActiveRemoteRequests struct { + sync.Mutex + // The string key is an mxc:// URL + Set map[string]*sync.Cond +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go new file mode 100644 index 000000000..82053f149 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/download.go @@ -0,0 +1,96 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package writers + +import ( + "encoding/json" + "net/http" + + log "github.com/Sirupsen/logrus" + "github.com/matrix-org/dendrite/clientapi/jsonerror" + "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" + "github.com/matrix-org/util" +) + +// downloadRequest metadata included in or derivable from an download request +// https://matrix.org/docs/spec/client_server/r0.2.0.html#get-matrix-media-r0-download-servername-mediaid +type downloadRequest struct { + MediaMetadata *types.MediaMetadata + Logger *log.Entry +} + +// Download implements /download +func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib.ServerName, mediaID types.MediaID, cfg *config.MediaAPI, activeRemoteRequests *types.ActiveRemoteRequests) { + r := &downloadRequest{ + MediaMetadata: &types.MediaMetadata{ + MediaID: mediaID, + Origin: origin, + }, + Logger: util.GetLogger(req.Context()), + } + + // request validation + if req.Method != "GET" { + r.jsonErrorResponse(w, util.JSONResponse{ + Code: 405, + JSON: jsonerror.Unknown("request method must be GET"), + }) + return + } + + if resErr := r.Validate(); resErr != nil { + r.jsonErrorResponse(w, *resErr) + return + } + + // doDownload +} + +func (r *downloadRequest) jsonErrorResponse(w http.ResponseWriter, res util.JSONResponse) { + // Marshal JSON response into raw bytes to send as the HTTP body + resBytes, err := json.Marshal(res.JSON) + if err != nil { + r.Logger.WithError(err).Error("Failed to marshal JSONResponse") + // this should never fail to be marshalled so drop err to the floor + res = util.MessageResponse(500, "Internal Server Error") + resBytes, _ = json.Marshal(res.JSON) + } + + // Set status code and write the body + w.WriteHeader(res.Code) + r.Logger.WithField("code", res.Code).Infof("Responding (%d bytes)", len(resBytes)) + w.Write(resBytes) +} + +// Validate validates the downloadRequest fields +func (r *downloadRequest) Validate() *util.JSONResponse { + // FIXME: the following errors aren't bad JSON, rather just a bad request path + // maybe give the URL pattern in the routing, these are not even possible as the handler would not be hit...? + if r.MediaMetadata.MediaID == "" { + return &util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound("mediaId must be a non-empty string"), + } + } + if r.MediaMetadata.Origin == "" { + return &util.JSONResponse{ + Code: 404, + JSON: jsonerror.NotFound("serverName must be a non-empty string"), + } + } + return nil +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go new file mode 100644 index 000000000..017a00783 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -0,0 +1,149 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package writers + +import ( + "fmt" + "net/http" + "net/url" + "strings" + + log "github.com/Sirupsen/logrus" + "github.com/matrix-org/dendrite/clientapi/jsonerror" + "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/util" +) + +// uploadRequest metadata included in or derivable from an upload request +// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload +// NOTE: The members come from HTTP request metadata such as headers, query parameters or can be derived from such +type uploadRequest struct { + MediaMetadata *types.MediaMetadata + Logger *log.Entry +} + +// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload +type uploadResponse struct { + ContentURI string `json:"content_uri"` +} + +// Upload implements /upload +// +// This endpoint involves uploading potentially significant amounts of data to the homeserver. +// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. +// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. +// TODO: We should time out requests if they have not received any data within a configured timeout period. +func Upload(req *http.Request, cfg *config.MediaAPI) util.JSONResponse { + r, resErr := parseAndValidateRequest(req, cfg) + if resErr != nil { + return *resErr + } + + // doUpload + + return util.JSONResponse{ + Code: 200, + JSON: uploadResponse{ + ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID), + }, + } +} + +// parseAndValidateRequest parses the incoming upload request to validate and extract +// all the metadata about the media being uploaded. Returns either an uploadRequest or +// an error formatted as a util.JSONResponse +func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRequest, *util.JSONResponse) { + if req.Method != "POST" { + return nil, &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown("HTTP request method must be POST."), + } + } + + // authenticate user + + r := &uploadRequest{ + MediaMetadata: &types.MediaMetadata{ + Origin: cfg.ServerName, + ContentDisposition: types.ContentDisposition(req.Header.Get("Content-Disposition")), + FileSizeBytes: types.FileSizeBytes(req.ContentLength), + ContentType: types.ContentType(req.Header.Get("Content-Type")), + UploadName: types.Filename(req.FormValue("filename")), + }, + Logger: util.GetLogger(req.Context()), + } + + if resErr := r.Validate(cfg.MaxFileSizeBytes); resErr != nil { + return nil, resErr + } + + // FIXME: do we want to always override ContentDisposition here or only if + // there is no Content-Disposition header set? + if len(r.MediaMetadata.UploadName) > 0 { + r.MediaMetadata.ContentDisposition = types.ContentDisposition( + "inline; filename*=utf-8''" + url.PathEscape(string(r.MediaMetadata.UploadName)), + ) + } + + return r, nil +} + +// Validate validates the uploadRequest fields +func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse { + // TODO: Any validation to be done on ContentDisposition? + + if r.MediaMetadata.FileSizeBytes < 1 { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."), + } + } + if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > maxFileSizeBytes { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)), + } + } + // TODO: Check if the Content-Type is a valid type? + if r.MediaMetadata.ContentType == "" { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown("HTTP Content-Type request header must be set."), + } + } + // TODO: Validate filename - what are the valid characters? + if r.MediaMetadata.UserID != "" { + // TODO: We should put user ID parsing code into gomatrixserverlib and use that instead + // (see https://github.com/matrix-org/gomatrixserverlib/blob/3394e7c7003312043208aa73727d2256eea3d1f6/eventcontent.go#L347 ) + // It should be a struct (with pointers into a single string to avoid copying) and + // we should update all refs to use UserID types rather than strings. + // https://github.com/matrix-org/synapse/blob/v0.19.2/synapse/types.py#L92 + if len(r.MediaMetadata.UserID) == 0 || r.MediaMetadata.UserID[0] != '@' { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown("user id must start with '@'"), + } + } + parts := strings.SplitN(string(r.MediaMetadata.UserID[1:]), ":", 2) + if len(parts) != 2 { + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.BadJSON("user id must be in the form @localpart:domain"), + } + } + } + return nil +}