melbot / Swabble /Sources /SwabbleCore /Support /AttributedString+Sentences.swift
amos-fernandes's picture
Upload 4501 files
3a65265 verified
import CoreMedia
import Foundation
import NaturalLanguage
extension AttributedString {
public func sentences(maxLength: Int? = nil) -> [AttributedString] {
let tokenizer = NLTokenizer(unit: .sentence)
let string = String(characters)
tokenizer.string = string
let sentenceRanges = tokenizer.tokens(for: string.startIndex..<string.endIndex).map {
(
$0,
AttributedString.Index($0.lowerBound, within: self)!
..<
AttributedString.Index($0.upperBound, within: self)!)
}
let ranges = sentenceRanges.flatMap { sentenceStringRange, sentenceRange in
let sentence = self[sentenceRange]
guard let maxLength, sentence.characters.count > maxLength else {
return [sentenceRange]
}
let wordTokenizer = NLTokenizer(unit: .word)
wordTokenizer.string = string
var wordRanges = wordTokenizer.tokens(for: sentenceStringRange).map {
AttributedString.Index($0.lowerBound, within: self)!
..<
AttributedString.Index($0.upperBound, within: self)!
}
guard !wordRanges.isEmpty else { return [sentenceRange] }
wordRanges[0] = sentenceRange.lowerBound..<wordRanges[0].upperBound
wordRanges[wordRanges.count - 1] = wordRanges[wordRanges.count - 1].lowerBound..<sentenceRange.upperBound
var ranges: [Range<AttributedString.Index>] = []
for wordRange in wordRanges {
if let lastRange = ranges.last,
self[lastRange].characters.count + self[wordRange].characters.count <= maxLength {
ranges[ranges.count - 1] = lastRange.lowerBound..<wordRange.upperBound
} else {
ranges.append(wordRange)
}
}
return ranges
}
return ranges.compactMap { range in
let audioTimeRanges = self[range].runs.filter {
!String(self[$0.range].characters)
.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
}.compactMap(\.audioTimeRange)
guard !audioTimeRanges.isEmpty else { return nil }
let start = audioTimeRanges.first!.start
let end = audioTimeRanges.last!.end
var attributes = AttributeContainer()
attributes[AttributeScopes.SpeechAttributes.TimeRangeAttribute.self] = CMTimeRange(
start: start,
end: end)
return AttributedString(self[range].characters, attributes: attributes)
}
}
}