File size: 7,405 Bytes
6ee917b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import Foundation
import Vapor

/**
 Expose database as S3 endpoint. This can be used to pull data from one server to another. It is used only internally to transfer data between Open-Meteo API nodes. Note: This is only a limited implementation and not fully compatible.
 
 List example:
 `http://127.0.0.1:8080/?list-type=2&delimiter=/&prefix=data/cmc_gem_gdps/shortwave_radiation/&apikey=123`
 
 Download exmaple
 `http://127.0.0.1:8080/data/cmc_gem_gdps/shortwave_radiation/chunk_1430.om?apikey=123`
 
 TODO:
 - Actual S3 authentication with signatures instead of simple apikeys as URL query parameter. Not required at this stage.
 
 Nginx setting:
 ```
 location /data-internal {
   internal;
   alias /var/lib/openmeteo-api/data;
 }
 ```
 */
struct S3DataController: RouteCollection {
    static var syncApiKeys: [String.SubSequence] = Environment.get("API_SYNC_APIKEYS")?.split(separator: ",") ?? []
    static var nginxSendfilePrefix = Environment.get("NGINX_SENDFILE_PREFIX")
    
    func boot(routes: RoutesBuilder) throws {
        if Self.syncApiKeys.isEmpty {
            return
        }
        routes.get("", use: self.list)
        routes.get("data", "**", use: self.get)
    }
    
    /// https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
    struct S3ListV2: Codable {
        let list_type: Int
        let delimiter: String
        let prefix: String
        let apikey: String?
        
        enum CodingKeys: String, CodingKey {
            case list_type = "list-type"
            case delimiter
            case prefix
            case apikey
        }
    }
    
    struct S3ListV2File {
        let name: String
        let modificationTime: Date
        let fileSize: Int
    }
    
    struct DownloadParams: Codable {
        let apikey: String?
        /// in megabytes per second
        let rate: Int?
    }
    
    /// List all files in a specified directory
    func list(_ req: Request) async throws -> Response {
        let params = try req.query.decode(S3ListV2.self)
        guard let apikey = params.apikey, Self.syncApiKeys.contains(where: {$0 == apikey}) else {
            throw SyncError.invalidApiKey
        }
        
        let path = params.prefix.sanitisedPath
        guard params.list_type == 2, params.delimiter == "/",
              path.last == "/", path.starts(with: "data/") else {
            throw Abort(.forbidden)
        }
        
        let pathNoData = path[path.index(path.startIndex, offsetBy: 5)..<path.endIndex]
        let pathUrl = URL(fileURLWithPath: "\(OpenMeteo.dataDirectory)\(pathNoData)", isDirectory: true)
        let resourceKeys = Set<URLResourceKey>([.nameKey, .isDirectoryKey, .contentModificationDateKey, .fileSizeKey])
        
        guard let directoryEnumerator = FileManager.default.enumerator(at: pathUrl, includingPropertiesForKeys: Array(resourceKeys), options: [.skipsHiddenFiles, .skipsSubdirectoryDescendants]) else {
            throw Abort(.forbidden)
        }
        
        var files = [S3ListV2File]()
        var directories = [String]()
        for case let fileURL as URL in directoryEnumerator {
            guard let resourceValues = try? fileURL.resourceValues(forKeys: resourceKeys),
                  let isDirectory = resourceValues.isDirectory,
                  let name = resourceValues.name,
                  !name.contains("~")
            else {
                continue
            }
            if isDirectory {
                directories.append(name)
            } else {
                guard let modificationTime = resourceValues.contentModificationDate,
                      let fileSize = resourceValues.fileSize
                else {
                    continue
                }
                files.append(S3ListV2File(name: name, modificationTime: modificationTime, fileSize: fileSize))
            }
            
        }
        
        let dateFormat = DateFormatter.awsS3DateTime
        let filesXml = files.map {
            """
            <Contents>
                <Key>\(path)\($0.name)</Key>
                <LastModified>\(dateFormat.string(from: $0.modificationTime))</LastModified>
                <Size>\($0.fileSize)</Size>
                <StorageClass>STANDARD</StorageClass>
            </Contents>
            """
        }.joined(separator: "\n")
        let directoriesXml = directories.map {
            """
            <CommonPrefixes>
            <Prefix>\(path)\($0)/</Prefix>
            </CommonPrefixes>
            """
        }.joined(separator: "\n")
        
        var headers = HTTPHeaders()
        headers.add(name: .contentType, value: "application/xml")
        return Response(status: .ok, headers: headers, body: .init(string: """
        <?xml version="1.0" encoding="UTF-8"?>
        <ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
            <Name>openmeteo</Name>
            <Prefix>\(path)</Prefix>
            <KeyCount>\(files.count + directories.count)</KeyCount>
            <MaxKeys>1000</MaxKeys>
            <Delimiter>/</Delimiter>
            <IsTruncated>false</IsTruncated>
            \(directoriesXml)
            \(filesXml)
        </ListBucketResult>
        """))
    }
    
    /// Serve file through nginx send file
    func get(_ req: Request) async throws -> Response {
        let params = try req.query.decode(DownloadParams.self)
        let path = req.url.path.sanitisedPath
        let isJson = path.hasSuffix(".json")
        if !isJson {
            /// Only require API keys for non-json calls
            guard let apikey = params.apikey, Self.syncApiKeys.contains(where: {$0 == apikey}) else {
                throw SyncError.invalidApiKey
            }
        }
        
        guard path.last != "/", path.starts(with: "/data/") else {
            throw Abort(.forbidden)
        }
        let pathNoData = path[path.index(path.startIndex, offsetBy: 6)..<path.endIndex]
        
        if let nginxSendfilePrefix = Self.nginxSendfilePrefix {
            let response = Response()
            //let response = req.fileio.streamFile(at: abspath)
            response.headers.add(name: "X-Accel-Redirect", value: "/\(nginxSendfilePrefix)/\(pathNoData)")
            if let rate = params.rate {
                // Bytes per second download speed limit
                response.headers.add(name: "X-Accel-Limit-Rate", value: "\((rate)*1024*1024)")
            }
            return response
        }
        let response = req.fileio.streamFile(at: "\(OpenMeteo.dataDirectory)\(pathNoData)")
        return response
    }
}


enum SyncError: AbortError {
    case invalidApiKey
    
    var status: NIOHTTP1.HTTPResponseStatus {
        switch self {
        case .invalidApiKey:
            return .unauthorized
        }
    }
}

fileprivate extension String {
    static var sanitisedPathCharacterSet = CharacterSet(charactersIn: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-_/").inverted
    
    /// Allow only alpha numerics, dash, underscore and slash
    var sanitisedPath: String {
        return trimmingCharacters(in: Self.sanitisedPathCharacterSet)
    }
}

extension DateFormatter {
    /// Format dates like `2023-11-14T04:32:17.000Z`
    static var awsS3DateTime = {
        let dateFormat = DateFormatter()
        dateFormat.dateFormat = "y-MM-dd'T'HH:mm:ss.SSS'Z'"
        return dateFormat
    }()
}