Spaces:
Sleeping
Sleeping
| --[[ | |
| Transcript.lua - Speech transcription data model | |
| ]]-- | |
| Transcript = Polo { | |
| COLUMN_ORDER = {"id", "seek", "start", "end", "text", "score", "file"}, | |
| DEFAULT_HIDE = { | |
| seek = true, temperature = true, tokens = true, avg_logprob = true, | |
| compression_ratio = true, no_speech_prob = true | |
| }, | |
| init = function(self) | |
| self:clear() | |
| end | |
| } | |
| Transcript.calculate_offset = function (item, take) | |
| return ( | |
| reaper.GetMediaItemInfo_Value(item, 'D_POSITION') | |
| - reaper.GetMediaItemTakeInfo_Value(take, 'D_STARTOFFS')) | |
| end | |
| function Transcript:clear() | |
| self.init_data = {} | |
| self.filtered_data = {} | |
| self.data = {} | |
| self.search = '' | |
| end | |
| function Transcript:get_columns() | |
| if #self.init_data > 0 then | |
| local columns = {"score"} | |
| local row = self.init_data[1] | |
| for k, _ in pairs(row.data) do | |
| if k:sub(1, 1) ~= '_' then | |
| table.insert(columns, k) | |
| end | |
| end | |
| return self:_sort_columns(columns) | |
| end | |
| return {} | |
| end | |
| function Transcript:_sort_columns(columns) | |
| local order = self.COLUMN_ORDER | |
| local column_set = {} | |
| local extra_columns = {} | |
| local order_set = {} | |
| local result = {} | |
| for _, column in pairs(columns) do | |
| column_set[column] = true | |
| end | |
| for _, column in pairs(order) do | |
| order_set[column] = true | |
| if column_set[column] then | |
| table.insert(result, column) | |
| end | |
| end | |
| for _, column in pairs(columns) do | |
| if not order_set[column] then | |
| table.insert(extra_columns, column) | |
| end | |
| end | |
| table.sort(extra_columns) | |
| for _, column in pairs(extra_columns) do | |
| table.insert(result, column) | |
| end | |
| return result | |
| end | |
| function Transcript:add_segment(segment) | |
| table.insert(self.init_data, segment) | |
| end | |
| function Transcript:has_segments() | |
| return #self.init_data > 0 | |
| end | |
| function Transcript:get_segments() | |
| return self.data | |
| end | |
| function Transcript:sort(column, ascending) | |
| self.data = {table.unpack(self.filtered_data)} | |
| table.sort(self.data, function (a, b) | |
| local a_val, b_val = a:get(column), b:get(column) | |
| if a_val == nil then a_val = '' end | |
| if b_val == nil then b_val = '' end | |
| if not ascending then | |
| a_val, b_val = b_val, a_val | |
| end | |
| return a_val < b_val | |
| end) | |
| end | |
| function Transcript:to_table() | |
| local segments = {} | |
| for _, segment in pairs(self.data) do | |
| table.insert(segments, segment:to_table()) | |
| end | |
| return {segments = segments} | |
| end | |
| function Transcript:to_json() | |
| return json.encode(self:to_table()) | |
| end | |
| function Transcript:update() | |
| if #self.init_data == 0 then | |
| self:clear() | |
| return | |
| end | |
| local columns = self:get_columns() | |
| if #self.search > 0 then | |
| local search = self.search | |
| local search_lower = search:lower() | |
| local match_case = (search ~= search_lower) | |
| self.filtered_data = {} | |
| for _, segment in pairs(self.init_data) do | |
| local matching = false | |
| for _, column in pairs(columns) do | |
| if match_case then | |
| if tostring(segment.data[column]):find(search) then | |
| matching = true | |
| break | |
| end | |
| else | |
| if tostring(segment.data[column]):lower():find(search_lower) then | |
| matching = true | |
| break | |
| end | |
| end | |
| end | |
| if matching then | |
| table.insert(self.filtered_data, segment) | |
| end | |
| end | |
| else | |
| self.filtered_data = self.init_data | |
| end | |
| self.data = self.filtered_data | |
| end | |
| function Transcript:create_markers(proj, regions, words) | |
| proj = proj or 0 | |
| regions = regions or false | |
| for i, segment in pairs(self.data) do | |
| local offset = self.calculate_offset(segment.item, segment.take) | |
| local want_index = segment:get('id', i) | |
| local color = 0 | |
| if words then | |
| for _, word in pairs(segment.words) do | |
| local start = word.start + offset | |
| local end_ = word.end_ + offset | |
| local name = word.word | |
| reaper.AddProjectMarker2(proj, regions, start, end_, name, want_index, color) | |
| end | |
| else | |
| local start = segment.start + offset | |
| local end_ = segment.end_ + offset | |
| local name = segment.text | |
| reaper.AddProjectMarker2(proj, regions, start, end_, name, want_index, color) | |
| end | |
| end | |
| end | |
| function Transcript:create_notes_track(words) | |
| local cur_pos = reaper.GetCursorPosition() | |
| local index = 0 | |
| reaper.InsertTrackAtIndex(index, false) | |
| local track = reaper.GetTrack(0, index) | |
| reaper.SetOnlyTrackSelected(track) | |
| reaper.GetSetMediaTrackInfo_String(track, 'P_NAME', 'Speech', true) | |
| for _, segment in pairs(self.data) do | |
| local offset = self.calculate_offset(segment.item, segment.take) | |
| if words then | |
| for _, word in pairs(segment.words) do | |
| local start = word.start + offset | |
| local end_ = word.end_ + offset | |
| local text = word.word | |
| self:_create_note(start, end_, text, false) | |
| end | |
| else | |
| local start = segment.start + offset | |
| local end_ = segment.end_ + offset | |
| local text = segment.text | |
| self:_create_note(start, end_, text, true) | |
| end | |
| end | |
| reaper.SetEditCurPos(cur_pos, true, true) | |
| end | |
| function Transcript:_create_note(start, end_, text, stretch) | |
| local item = self:_create_empty_item(start, end_) | |
| self:_set_note_text(item, text, stretch) | |
| end | |
| function Transcript:_create_empty_item(start, end_) | |
| self:_insert_empty_item() | |
| local item = reaper.GetSelectedMediaItem(0, 0) | |
| reaper.SelectAllMediaItems(0, false) | |
| reaper.SetMediaItemPosition(item, start, true) | |
| reaper.SetMediaItemLength(item, end_ - start, true) | |
| return item | |
| end | |
| function Transcript:_insert_empty_item() | |
| reaper.Main_OnCommand(40142, 0) | |
| end | |
| function Transcript:_set_note_text(item, text, stretch) | |
| local _, chunk = reaper.GetItemStateChunk(item, "", false) | |
| local notes_chunk = ("<NOTES\n|%s\n>\n"):format(text:match("^%s*(.-)%s*$")) | |
| local flags_chunk = (stretch and "IMGRESOURCEFLAGS 11\n" or "") | |
| chunk = chunk:gsub('>', notes_chunk:gsub('%%', '%%%%') .. flags_chunk .. '>') | |
| reaper.SetItemStateChunk(item, chunk, false) | |
| end | |
| TranscriptSegment = Polo { | |
| _proxy_fields = { | |
| start = 'start', | |
| end_ = 'end', | |
| text = 'text', | |
| } | |
| } | |
| TranscriptSegment.__index = function(o, key) | |
| local proxy_target = TranscriptSegment._proxy_fields[key] | |
| if proxy_target then | |
| return o.data[proxy_target] | |
| else | |
| return TranscriptSegment[key] | |
| end | |
| end | |
| function TranscriptSegment:init() | |
| assert(self.data, 'missing data') | |
| assert(self.item, 'missing item') | |
| assert(self.take, 'missing take') | |
| self.data = self._copy(self.data) | |
| self.data['file'] = self:get_file() | |
| end | |
| TranscriptSegment.from_whisper = function(segment, item, take) | |
| local result = {} | |
| local words = segment.words | |
| segment = TranscriptSegment._copy(segment) | |
| segment.text = segment.text:match("^%s*(.-)%s*$") | |
| segment.words = nil | |
| if words then | |
| local transcript_words = {} | |
| for _, word in pairs(words) do | |
| local transcript_word = TranscriptWord.new({ | |
| word = word.word:match("^%s*(.-)%s*$"), | |
| probability = word.probability, | |
| start = word.start, | |
| end_ = word['end'] | |
| }) | |
| table.insert(transcript_words, transcript_word) | |
| end | |
| table.insert(result, TranscriptSegment.new({ | |
| data = segment, | |
| item = item, | |
| take = take, | |
| words = transcript_words | |
| })) | |
| else | |
| table.insert(result, TranscriptSegment.new({ | |
| data = segment, | |
| item = item, | |
| take = take | |
| })) | |
| end | |
| return result | |
| end | |
| TranscriptSegment.default_hide = function(column) | |
| return Transcript.DEFAULT_HIDE[column] or false | |
| end | |
| TranscriptSegment.merge_words = function(words, index1, index2) | |
| local word1 = words[index1] | |
| local word2 = words[index2] | |
| local new_word = TranscriptWord.new { | |
| word = word1.word .. word2.word, | |
| start = word1.start, | |
| end_ = word2.end_, | |
| probability = (word1.probability + word2.probability) / 2 | |
| } | |
| table.remove(words, index2) | |
| table.remove(words, index1) | |
| table.insert(words, index1, new_word) | |
| end | |
| TranscriptSegment.split_word = function(words, index) | |
| local word = words[index] | |
| local length = utf8.len(word.word) | |
| local half_length = math.floor(length / 2) | |
| local new_word1 = TranscriptWord.new { | |
| word = word.word:sub(1, utf8.offset(word.word, half_length)), | |
| start = word.start, | |
| end_ = word.start + (word.end_ - word.start) / 2, | |
| probability = word.probability | |
| } | |
| local new_word2 = TranscriptWord.new { | |
| word = word.word:sub(utf8.offset(word.word, half_length + 1)), | |
| start = word.start + (word.end_ - word.start) / 2, | |
| end_ = word.end_, | |
| probability = word.probability | |
| } | |
| table.remove(words, index) | |
| table.insert(words, index, new_word2) | |
| table.insert(words, index, new_word1) | |
| end | |
| TranscriptSegment._copy = function(data) | |
| local result = {} | |
| for k, v in pairs(data) do | |
| result[k] = v | |
| end | |
| return result | |
| end | |
| function TranscriptSegment:score() | |
| local score = 0.0 | |
| if self.words and #self.words > 0 then | |
| for _, word in pairs(self.words) do | |
| score = score + word:score() | |
| end | |
| return score / #self.words | |
| else | |
| return 0.0 | |
| end | |
| end | |
| function TranscriptSegment:get(column, default) | |
| if column == 'score' then | |
| return self:score() | |
| elseif self.data[column] then | |
| return self.data[column] | |
| else | |
| return default | |
| end | |
| end | |
| function TranscriptSegment:set_words(words) | |
| self.words = words | |
| self:update_text() | |
| end | |
| function TranscriptSegment:update_text() | |
| local text_chunks = {} | |
| for _, word in pairs(self.words) do | |
| table.insert(text_chunks, word.word) | |
| end | |
| self.data['text'] = table.concat(text_chunks, ' ') | |
| end | |
| function TranscriptSegment:get_file(include_extensions) | |
| include_extensions = include_extensions or false | |
| local file = '' | |
| app:trap(function () | |
| local source = reaper.GetMediaItemTake_Source(self.take) | |
| if source then | |
| local source_path = reaper.GetMediaSourceFileName(source) | |
| file = source_path:gsub(".*[\\/](.*)", "%1") | |
| if not include_extensions then | |
| file = file:gsub("(.*)[.].*", "%1") | |
| end | |
| end | |
| end) | |
| return file | |
| end | |
| function TranscriptSegment:get_file_with_extension() | |
| return self:get_file(true) | |
| end | |
| function TranscriptSegment:navigate(word_index, autoplay) | |
| local start = self.start | |
| if word_index then | |
| start = self.words[word_index].start | |
| end | |
| local offset = start - reaper.GetMediaItemTakeInfo_Value(self.take, 'D_STARTOFFS') | |
| self:_navigate_to_media_item(self.item) | |
| reaper.MoveEditCursor(offset, false) | |
| if autoplay and reaper.GetPlayState() & 1 == 0 then | |
| self:_transport_play() | |
| end | |
| if reaper.GetPlayState() & 1 == 1 then | |
| self:_transport_play() | |
| end | |
| end | |
| function TranscriptSegment:_navigate_to_media_item(item) | |
| reaper.SelectAllMediaItems(0, false) | |
| reaper.SetMediaItemSelected(item, true) | |
| self:_move_cursor_to_start_of_items() | |
| end | |
| function TranscriptSegment:_move_cursor_to_start_of_items() | |
| reaper.Main_OnCommand(41173, 0) | |
| end | |
| function TranscriptSegment:_transport_play() | |
| reaper.Main_OnCommand(1007, 0) | |
| end | |
| function TranscriptSegment:to_json() | |
| return json.encode(self:to_table()) | |
| end | |
| function TranscriptSegment:to_table() | |
| local result = self._copy(self.data) | |
| if self.words then | |
| result['words'] = {} | |
| for _, word in pairs(self.words) do | |
| table.insert(result['words'], word:to_table()) | |
| end | |
| end | |
| return result | |
| end | |
| function TranscriptSegment:select_in_timeline(offset) | |
| offset = offset or 0 | |
| local start = self.start + offset | |
| local end_ = self.end_ + offset | |
| reaper.GetSet_LoopTimeRange(true, true, start, end_, false) | |
| end | |
| TranscriptWord = Polo {} | |
| function TranscriptWord:init() | |
| assert(self.word, 'missing word') | |
| assert(self.start, 'missing start') | |
| assert(self.end_, 'missing end_') | |
| assert(self.probability, 'missing probability') | |
| end | |
| function TranscriptWord:copy() | |
| return TranscriptWord.new { | |
| word = self.word, | |
| start = self.start, | |
| end_ = self.end_, | |
| probability = self.probability, | |
| } | |
| end | |
| function TranscriptWord:score() | |
| return self.probability | |
| end | |
| function TranscriptWord:to_table() | |
| return { | |
| word = self.word, | |
| start = self.start, | |
| ['end'] = self.end_, | |
| probability = self.probability, | |
| } | |
| end | |
| function TranscriptWord:select_in_timeline(offset) | |
| offset = offset or 0 | |
| local start = self.start + offset | |
| local end_ = self.end_ + offset | |
| reaper.GetSet_LoopTimeRange(true, true, start, end_, false) | |
| end | |