import Foundation import CoreML struct InputPayload: Codable { let prompt: String let prompt_ids: [Int32] let seq_len: Int let max_new_tokens: Int let steps: Int let mask_token_id: Int32 let eos_token_id: Int32 let pad_token_id: Int32 let compute_units: String? } struct StepStat: Codable { let step: Int let masked_before: Int let fixed_this_step: Int let masked_after: Int let avg_fixed_score: Float } struct OutputPayload: Codable { let prompt: String let prompt_ids: [Int32] let final_ids: [Int32] let generated_ids: [Int32] let generated_ids_untrimmed: [Int32] let prompt_len: Int let total_len: Int let step_stats: [StepStat] let load_seconds: Double let total_predict_seconds: Double let loop_seconds: Double } func argValue(_ name: String) -> String? { let args = CommandLine.arguments guard let idx = args.firstIndex(of: name), idx + 1 < args.count else { return nil } return args[idx + 1] } func usageAndExit() -> Never { fputs("Usage: swift scripts/llada_diffuse.swift --model --input --output \n", stderr) exit(2) } func computeUnits(from value: String?) -> MLComputeUnits { guard let raw = value?.lowercased() else { return .all } switch raw { case "cpu", "cpuonly": return .cpuOnly case "cpugpu", "cpuandgpu": return .cpuAndGPU case "cpune", "cpuandneuralengine": return .cpuAndNeuralEngine default: return .all } } func int32Array(_ values: [Int32], shape: [NSNumber]) throws -> MLMultiArray { let arr = try MLMultiArray(shape: shape, dataType: .int32) for i in 0.. 0 { for i in 0.. 0 { for i in 0.. promptLen { for i in promptLen.. ([Int32], [Float], Double) { let idsMA = try int32Array(ids, shape: [1, NSNumber(value: seqLen)]) let maskMA = try int32Array(mask, shape: [1, NSNumber(value: seqLen)]) let provider = try MLDictionaryFeatureProvider(dictionary: [ "input_ids": MLFeatureValue(multiArray: idsMA), "attention_mask": MLFeatureValue(multiArray: maskMA) ]) let t0 = Date() let out = try model.prediction(from: provider) let dt = Date().timeIntervalSince(t0) guard let predMA = out.featureValue(for: "var_4801")?.multiArrayValue, let scoreMA = out.featureValue(for: "var_4806")?.multiArrayValue else { throw NSError(domain: "llada_diffuse", code: 1, userInfo: [NSLocalizedDescriptionKey: "Model outputs var_4801/var_4806 not found"]) } var pred = Array(repeating: Int32(0), count: seqLen) var score = Array(repeating: Float(0), count: seqLen) for i in 0.. promptLen { for i in promptLen.. score[$1] } var scoreSum: Float = 0 if fixCount > 0 { for j in 0.. promptLen { for i in promptLen.. 0 ? scoreSum / Float(fixCount) : 0 stepStats.append(StepStat( step: step, masked_before: maskedPositions.count, fixed_this_step: fixCount, masked_after: maskedAfter, avg_fixed_score: avgScore )) } // Safety: if any generation positions are still masked, fill them from one final pass. var remainingPositions: [Int] = [] if totalLen > promptLen { for i in promptLen.. promptLen ? Array(tokenBuffer[promptLen..