Spaces:
Paused
Paused
File size: 2,713 Bytes
4fc4790 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | import CoreMedia
import Foundation
import NaturalLanguage
extension AttributedString {
public func sentences(maxLength: Int? = nil) -> [AttributedString] {
let tokenizer = NLTokenizer(unit: .sentence)
let string = String(characters)
tokenizer.string = string
let sentenceRanges = tokenizer.tokens(for: string.startIndex..<string.endIndex).map {
(
$0,
AttributedString.Index($0.lowerBound, within: self)!
..<
AttributedString.Index($0.upperBound, within: self)!)
}
let ranges = sentenceRanges.flatMap { sentenceStringRange, sentenceRange in
let sentence = self[sentenceRange]
guard let maxLength, sentence.characters.count > maxLength else {
return [sentenceRange]
}
let wordTokenizer = NLTokenizer(unit: .word)
wordTokenizer.string = string
var wordRanges = wordTokenizer.tokens(for: sentenceStringRange).map {
AttributedString.Index($0.lowerBound, within: self)!
..<
AttributedString.Index($0.upperBound, within: self)!
}
guard !wordRanges.isEmpty else { return [sentenceRange] }
wordRanges[0] = sentenceRange.lowerBound..<wordRanges[0].upperBound
wordRanges[wordRanges.count - 1] = wordRanges[wordRanges.count - 1].lowerBound..<sentenceRange.upperBound
var ranges: [Range<AttributedString.Index>] = []
for wordRange in wordRanges {
if let lastRange = ranges.last,
self[lastRange].characters.count + self[wordRange].characters.count <= maxLength {
ranges[ranges.count - 1] = lastRange.lowerBound..<wordRange.upperBound
} else {
ranges.append(wordRange)
}
}
return ranges
}
return ranges.compactMap { range in
let audioTimeRanges = self[range].runs.filter {
!String(self[$0.range].characters)
.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
}.compactMap(\.audioTimeRange)
guard !audioTimeRanges.isEmpty else { return nil }
let start = audioTimeRanges.first!.start
let end = audioTimeRanges.last!.end
var attributes = AttributeContainer()
attributes[AttributeScopes.SpeechAttributes.TimeRangeAttribute.self] = CMTimeRange(
start: start,
end: end)
return AttributedString(self[range].characters, attributes: attributes)
}
}
}
|