| import AVFoundation |
| import OpenClawKit |
| import Foundation |
|
|
| actor CameraController { |
| struct CameraDeviceInfo: Codable, Sendable { |
| var id: String |
| var name: String |
| var position: String |
| var deviceType: String |
| } |
|
|
| enum CameraError: LocalizedError, Sendable { |
| case cameraUnavailable |
| case microphoneUnavailable |
| case permissionDenied(kind: String) |
| case invalidParams(String) |
| case captureFailed(String) |
| case exportFailed(String) |
|
|
| var errorDescription: String? { |
| switch self { |
| case .cameraUnavailable: |
| "Camera unavailable" |
| case .microphoneUnavailable: |
| "Microphone unavailable" |
| case let .permissionDenied(kind): |
| "\(kind) permission denied" |
| case let .invalidParams(msg): |
| msg |
| case let .captureFailed(msg): |
| msg |
| case let .exportFailed(msg): |
| msg |
| } |
| } |
| } |
|
|
| func snap(params: OpenClawCameraSnapParams) async throws -> ( |
| format: String, |
| base64: String, |
| width: Int, |
| height: Int) |
| { |
| let facing = params.facing ?? .front |
| let format = params.format ?? .jpg |
| |
| |
| let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600 |
| let quality = Self.clampQuality(params.quality) |
| let delayMs = max(0, params.delayMs ?? 0) |
|
|
| try await self.ensureAccess(for: .video) |
|
|
| let session = AVCaptureSession() |
| session.sessionPreset = .photo |
|
|
| guard let device = Self.pickCamera(facing: facing, deviceId: params.deviceId) else { |
| throw CameraError.cameraUnavailable |
| } |
|
|
| let input = try AVCaptureDeviceInput(device: device) |
| guard session.canAddInput(input) else { |
| throw CameraError.captureFailed("Failed to add camera input") |
| } |
| session.addInput(input) |
|
|
| let output = AVCapturePhotoOutput() |
| guard session.canAddOutput(output) else { |
| throw CameraError.captureFailed("Failed to add photo output") |
| } |
| session.addOutput(output) |
| output.maxPhotoQualityPrioritization = .quality |
|
|
| session.startRunning() |
| defer { session.stopRunning() } |
| await Self.warmUpCaptureSession() |
| await Self.sleepDelayMs(delayMs) |
|
|
| let settings: AVCapturePhotoSettings = { |
| if output.availablePhotoCodecTypes.contains(.jpeg) { |
| return AVCapturePhotoSettings(format: [AVVideoCodecKey: AVVideoCodecType.jpeg]) |
| } |
| return AVCapturePhotoSettings() |
| }() |
| settings.photoQualityPrioritization = .quality |
|
|
| var delegate: PhotoCaptureDelegate? |
| let rawData: Data = try await withCheckedThrowingContinuation { cont in |
| let d = PhotoCaptureDelegate(cont) |
| delegate = d |
| output.capturePhoto(with: settings, delegate: d) |
| } |
| withExtendedLifetime(delegate) {} |
|
|
| let maxPayloadBytes = 5 * 1024 * 1024 |
| |
| let maxEncodedBytes = (maxPayloadBytes / 4) * 3 |
| let res = try JPEGTranscoder.transcodeToJPEG( |
| imageData: rawData, |
| maxWidthPx: maxWidth, |
| quality: quality, |
| maxBytes: maxEncodedBytes) |
|
|
| return ( |
| format: format.rawValue, |
| base64: res.data.base64EncodedString(), |
| width: res.widthPx, |
| height: res.heightPx) |
| } |
|
|
| func clip(params: OpenClawCameraClipParams) async throws -> ( |
| format: String, |
| base64: String, |
| durationMs: Int, |
| hasAudio: Bool) |
| { |
| let facing = params.facing ?? .front |
| let durationMs = Self.clampDurationMs(params.durationMs) |
| let includeAudio = params.includeAudio ?? true |
| let format = params.format ?? .mp4 |
|
|
| try await self.ensureAccess(for: .video) |
| if includeAudio { |
| try await self.ensureAccess(for: .audio) |
| } |
|
|
| let session = AVCaptureSession() |
| session.sessionPreset = .high |
|
|
| guard let camera = Self.pickCamera(facing: facing, deviceId: params.deviceId) else { |
| throw CameraError.cameraUnavailable |
| } |
| let cameraInput = try AVCaptureDeviceInput(device: camera) |
| guard session.canAddInput(cameraInput) else { |
| throw CameraError.captureFailed("Failed to add camera input") |
| } |
| session.addInput(cameraInput) |
|
|
| if includeAudio { |
| guard let mic = AVCaptureDevice.default(for: .audio) else { |
| throw CameraError.microphoneUnavailable |
| } |
| let micInput = try AVCaptureDeviceInput(device: mic) |
| if session.canAddInput(micInput) { |
| session.addInput(micInput) |
| } else { |
| throw CameraError.captureFailed("Failed to add microphone input") |
| } |
| } |
|
|
| let output = AVCaptureMovieFileOutput() |
| guard session.canAddOutput(output) else { |
| throw CameraError.captureFailed("Failed to add movie output") |
| } |
| session.addOutput(output) |
| output.maxRecordedDuration = CMTime(value: Int64(durationMs), timescale: 1000) |
|
|
| session.startRunning() |
| defer { session.stopRunning() } |
| await Self.warmUpCaptureSession() |
|
|
| let movURL = FileManager().temporaryDirectory |
| .appendingPathComponent("openclaw-camera-\(UUID().uuidString).mov") |
| let mp4URL = FileManager().temporaryDirectory |
| .appendingPathComponent("openclaw-camera-\(UUID().uuidString).mp4") |
|
|
| defer { |
| try? FileManager().removeItem(at: movURL) |
| try? FileManager().removeItem(at: mp4URL) |
| } |
|
|
| var delegate: MovieFileDelegate? |
| let recordedURL: URL = try await withCheckedThrowingContinuation { cont in |
| let d = MovieFileDelegate(cont) |
| delegate = d |
| output.startRecording(to: movURL, recordingDelegate: d) |
| } |
| withExtendedLifetime(delegate) {} |
|
|
| |
| try await Self.exportToMP4(inputURL: recordedURL, outputURL: mp4URL) |
|
|
| let data = try Data(contentsOf: mp4URL) |
| return ( |
| format: format.rawValue, |
| base64: data.base64EncodedString(), |
| durationMs: durationMs, |
| hasAudio: includeAudio) |
| } |
|
|
| func listDevices() -> [CameraDeviceInfo] { |
| return Self.discoverVideoDevices().map { device in |
| CameraDeviceInfo( |
| id: device.uniqueID, |
| name: device.localizedName, |
| position: Self.positionLabel(device.position), |
| deviceType: device.deviceType.rawValue) |
| } |
| } |
|
|
| private func ensureAccess(for mediaType: AVMediaType) async throws { |
| let status = AVCaptureDevice.authorizationStatus(for: mediaType) |
| switch status { |
| case .authorized: |
| return |
| case .notDetermined: |
| let ok = await withCheckedContinuation(isolation: nil) { cont in |
| AVCaptureDevice.requestAccess(for: mediaType) { granted in |
| cont.resume(returning: granted) |
| } |
| } |
| if !ok { |
| throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone") |
| } |
| case .denied, .restricted: |
| throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone") |
| @unknown default: |
| throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone") |
| } |
| } |
|
|
| private nonisolated static func pickCamera( |
| facing: OpenClawCameraFacing, |
| deviceId: String?) -> AVCaptureDevice? |
| { |
| if let deviceId, !deviceId.isEmpty { |
| if let match = Self.discoverVideoDevices().first(where: { $0.uniqueID == deviceId }) { |
| return match |
| } |
| } |
| let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back |
| if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) { |
| return device |
| } |
| |
| return AVCaptureDevice.default(for: .video) |
| } |
|
|
| private nonisolated static func positionLabel(_ position: AVCaptureDevice.Position) -> String { |
| switch position { |
| case .front: "front" |
| case .back: "back" |
| default: "unspecified" |
| } |
| } |
|
|
| private nonisolated static func discoverVideoDevices() -> [AVCaptureDevice] { |
| let types: [AVCaptureDevice.DeviceType] = [ |
| .builtInWideAngleCamera, |
| .builtInUltraWideCamera, |
| .builtInTelephotoCamera, |
| .builtInDualCamera, |
| .builtInDualWideCamera, |
| .builtInTripleCamera, |
| .builtInTrueDepthCamera, |
| .builtInLiDARDepthCamera, |
| ] |
| let session = AVCaptureDevice.DiscoverySession( |
| deviceTypes: types, |
| mediaType: .video, |
| position: .unspecified) |
| return session.devices |
| } |
|
|
| nonisolated static func clampQuality(_ quality: Double?) -> Double { |
| let q = quality ?? 0.9 |
| return min(1.0, max(0.05, q)) |
| } |
|
|
| nonisolated static func clampDurationMs(_ ms: Int?) -> Int { |
| let v = ms ?? 3000 |
| |
| return min(60000, max(250, v)) |
| } |
|
|
| private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws { |
| let asset = AVURLAsset(url: inputURL) |
| guard let exporter = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetMediumQuality) else { |
| throw CameraError.exportFailed("Failed to create export session") |
| } |
| exporter.shouldOptimizeForNetworkUse = true |
|
|
| if #available(iOS 18.0, tvOS 18.0, visionOS 2.0, *) { |
| do { |
| try await exporter.export(to: outputURL, as: .mp4) |
| return |
| } catch { |
| throw CameraError.exportFailed(error.localizedDescription) |
| } |
| } else { |
| exporter.outputURL = outputURL |
| exporter.outputFileType = .mp4 |
|
|
| try await withCheckedThrowingContinuation(isolation: nil) { (cont: CheckedContinuation<Void, Error>) in |
| exporter.exportAsynchronously { |
| cont.resume(returning: ()) |
| } |
| } |
|
|
| switch exporter.status { |
| case .completed: |
| return |
| case .failed: |
| throw CameraError.exportFailed(exporter.error?.localizedDescription ?? "export failed") |
| case .cancelled: |
| throw CameraError.exportFailed("export cancelled") |
| default: |
| throw CameraError.exportFailed("export did not complete") |
| } |
| } |
| } |
|
|
| private nonisolated static func warmUpCaptureSession() async { |
| |
| try? await Task.sleep(nanoseconds: 150_000_000) |
| } |
|
|
| private nonisolated static func sleepDelayMs(_ delayMs: Int) async { |
| guard delayMs > 0 else { return } |
| let maxDelayMs = 10 * 1000 |
| let ns = UInt64(min(delayMs, maxDelayMs)) * UInt64(NSEC_PER_MSEC) |
| try? await Task.sleep(nanoseconds: ns) |
| } |
| } |
|
|
| private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate { |
| private let continuation: CheckedContinuation<Data, Error> |
| private var didResume = false |
|
|
| init(_ continuation: CheckedContinuation<Data, Error>) { |
| self.continuation = continuation |
| } |
|
|
| func photoOutput( |
| _ output: AVCapturePhotoOutput, |
| didFinishProcessingPhoto photo: AVCapturePhoto, |
| error: Error?) |
| { |
| guard !self.didResume else { return } |
| self.didResume = true |
|
|
| if let error { |
| self.continuation.resume(throwing: error) |
| return |
| } |
| guard let data = photo.fileDataRepresentation() else { |
| self.continuation.resume( |
| throwing: NSError(domain: "Camera", code: 1, userInfo: [ |
| NSLocalizedDescriptionKey: "photo data missing", |
| ])) |
| return |
| } |
| if data.isEmpty { |
| self.continuation.resume( |
| throwing: NSError(domain: "Camera", code: 2, userInfo: [ |
| NSLocalizedDescriptionKey: "photo data empty", |
| ])) |
| return |
| } |
| self.continuation.resume(returning: data) |
| } |
|
|
| func photoOutput( |
| _ output: AVCapturePhotoOutput, |
| didFinishCaptureFor resolvedSettings: AVCaptureResolvedPhotoSettings, |
| error: Error?) |
| { |
| guard let error else { return } |
| guard !self.didResume else { return } |
| self.didResume = true |
| self.continuation.resume(throwing: error) |
| } |
| } |
|
|
| private final class MovieFileDelegate: NSObject, AVCaptureFileOutputRecordingDelegate { |
| private let continuation: CheckedContinuation<URL, Error> |
| private var didResume = false |
|
|
| init(_ continuation: CheckedContinuation<URL, Error>) { |
| self.continuation = continuation |
| } |
|
|
| func fileOutput( |
| _ output: AVCaptureFileOutput, |
| didFinishRecordingTo outputFileURL: URL, |
| from connections: [AVCaptureConnection], |
| error: Error?) |
| { |
| guard !self.didResume else { return } |
| self.didResume = true |
|
|
| if let error { |
| let ns = error as NSError |
| if ns.domain == AVFoundationErrorDomain, |
| ns.code == AVError.maximumDurationReached.rawValue |
| { |
| self.continuation.resume(returning: outputFileURL) |
| return |
| } |
| self.continuation.resume(throwing: error) |
| return |
| } |
| self.continuation.resume(returning: outputFileURL) |
| } |
| } |
|
|