|
|
#include <ruby.h> |
|
|
#include "ruby_whisper.h" |
|
|
#include "common-whisper.h" |
|
|
#include <string> |
|
|
#include <vector> |
|
|
|
|
|
#ifdef __cplusplus |
|
|
extern "C" { |
|
|
#endif |
|
|
|
|
|
extern const rb_data_type_t ruby_whisper_type; |
|
|
extern const rb_data_type_t ruby_whisper_params_type; |
|
|
|
|
|
extern ID id_to_s; |
|
|
extern ID id_call; |
|
|
extern ID transcribe_option_names[1]; |
|
|
|
|
|
extern void |
|
|
prepare_transcription(ruby_whisper_params * rwp, VALUE * self); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
VALUE |
|
|
ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) { |
|
|
ruby_whisper *rw; |
|
|
ruby_whisper_params *rwp; |
|
|
VALUE wave_file_path, blk, params, kws; |
|
|
VALUE opts[1]; |
|
|
|
|
|
rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "2:&", &wave_file_path, ¶ms, &kws, &blk); |
|
|
rb_get_kwargs(kws, transcribe_option_names, 0, 1, opts); |
|
|
|
|
|
int n_processors = opts[0] == Qundef ? 1 : NUM2INT(opts[0]); |
|
|
|
|
|
GetContext(self, rw); |
|
|
TypedData_Get_Struct(params, ruby_whisper_params, &ruby_whisper_params_type, rwp); |
|
|
|
|
|
if (!rb_respond_to(wave_file_path, id_to_s)) { |
|
|
rb_raise(rb_eRuntimeError, "Expected file path to wave file"); |
|
|
} |
|
|
|
|
|
std::string fname_inp = StringValueCStr(wave_file_path); |
|
|
|
|
|
std::vector<float> pcmf32; |
|
|
std::vector<std::vector<float>> pcmf32s; |
|
|
|
|
|
if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) { |
|
|
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str()); |
|
|
return self; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prepare_transcription(rwp, &self); |
|
|
|
|
|
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), n_processors) != 0) { |
|
|
fprintf(stderr, "failed to process audio\n"); |
|
|
return self; |
|
|
} |
|
|
if (NIL_P(blk)) { |
|
|
return self; |
|
|
} |
|
|
const int n_segments = whisper_full_n_segments(rw->context); |
|
|
VALUE output = rb_str_new2(""); |
|
|
for (int i = 0; i < n_segments; ++i) { |
|
|
const char * text = whisper_full_get_segment_text(rw->context, i); |
|
|
output = rb_str_concat(output, rb_str_new2(text)); |
|
|
} |
|
|
rb_funcall(blk, id_call, 1, output); |
|
|
return self; |
|
|
} |
|
|
#ifdef __cplusplus |
|
|
} |
|
|
#endif |
|
|
|