|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <vector> |
|
|
#include <string> |
|
|
|
|
|
|
|
|
#include <stdio.h> |
|
|
#include <stdlib.h> |
|
|
#include <string.h> |
|
|
#include <stdbool.h> |
|
|
#include <stdint.h> |
|
|
#include <sys/types.h> |
|
|
#include <sys/stat.h> |
|
|
#include <fcntl.h> |
|
|
#include <unistd.h> |
|
|
#include <sys/mman.h> |
|
|
|
|
|
extern "C" { |
|
|
#include <libavutil/opt.h> |
|
|
#include <libavcodec/avcodec.h> |
|
|
#include <libavformat/avformat.h> |
|
|
#include <libswresample/swresample.h> |
|
|
} |
|
|
|
|
|
typedef uint64_t u64; |
|
|
typedef int64_t s64; |
|
|
typedef uint32_t u32; |
|
|
typedef int32_t s32; |
|
|
typedef uint16_t u16; |
|
|
typedef int16_t s16; |
|
|
typedef uint8_t u8; |
|
|
typedef int8_t s8; |
|
|
|
|
|
#define WAVE_SAMPLE_RATE 16000 |
|
|
#define AVIO_CTX_BUF_SZ 4096 |
|
|
|
|
|
static const char* ffmpegLog = getenv("FFMPEG_LOG"); |
|
|
|
|
|
#define LOG(...) \ |
|
|
do { if (ffmpegLog) fprintf(stderr, __VA_ARGS__); } while(0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct wave_hdr { |
|
|
|
|
|
char riff_header[4]; |
|
|
|
|
|
int wav_size; |
|
|
|
|
|
char wav_header[4]; |
|
|
|
|
|
|
|
|
|
|
|
char fmt_header[4]; |
|
|
|
|
|
int fmt_chunk_size; |
|
|
|
|
|
s16 audio_format; |
|
|
s16 num_channels; |
|
|
int sample_rate; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int byte_rate; |
|
|
|
|
|
s16 sample_alignment; |
|
|
|
|
|
s16 bit_depth; |
|
|
|
|
|
|
|
|
|
|
|
char data_header[4]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int data_bytes; |
|
|
} __attribute__((__packed__)); |
|
|
|
|
|
struct audio_buffer { |
|
|
u8 *ptr; |
|
|
int size; |
|
|
}; |
|
|
|
|
|
static void set_wave_hdr(wave_hdr& wh, size_t size) { |
|
|
memcpy(&wh.riff_header, "RIFF", 4); |
|
|
wh.wav_size = size + sizeof(struct wave_hdr) - 8; |
|
|
memcpy(&wh.wav_header, "WAVE", 4); |
|
|
memcpy(&wh.fmt_header, "fmt ", 4); |
|
|
wh.fmt_chunk_size = 16; |
|
|
wh.audio_format = 1; |
|
|
wh.num_channels = 1; |
|
|
wh.sample_rate = WAVE_SAMPLE_RATE; |
|
|
wh.sample_alignment = 2; |
|
|
wh.bit_depth = 16; |
|
|
wh.byte_rate = wh.sample_rate * wh.sample_alignment; |
|
|
memcpy(&wh.data_header, "data", 4); |
|
|
wh.data_bytes = size; |
|
|
} |
|
|
|
|
|
static void write_wave_hdr(int fd, size_t size) { |
|
|
struct wave_hdr wh; |
|
|
set_wave_hdr(wh, size); |
|
|
write(fd, &wh, sizeof(struct wave_hdr)); |
|
|
} |
|
|
|
|
|
static int map_file(int fd, u8 **ptr, size_t *size) |
|
|
{ |
|
|
struct stat sb; |
|
|
|
|
|
fstat(fd, &sb); |
|
|
*size = sb.st_size; |
|
|
|
|
|
*ptr = (u8*)mmap(NULL, *size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); |
|
|
if (*ptr == MAP_FAILED) { |
|
|
perror("mmap"); |
|
|
return -1; |
|
|
} |
|
|
|
|
|
return 0; |
|
|
} |
|
|
|
|
|
static int read_packet(void *opaque, u8 *buf, int buf_size) |
|
|
{ |
|
|
struct audio_buffer *audio_buf = (audio_buffer*)opaque; |
|
|
|
|
|
buf_size = FFMIN(buf_size, audio_buf->size); |
|
|
|
|
|
|
|
|
memcpy(buf, audio_buf->ptr, buf_size); |
|
|
audio_buf->ptr += buf_size; |
|
|
audio_buf->size -= buf_size; |
|
|
|
|
|
return buf_size; |
|
|
} |
|
|
|
|
|
static void convert_frame(struct SwrContext *swr, AVCodecContext *codec, |
|
|
AVFrame *frame, s16 **data, int *size, bool flush) |
|
|
{ |
|
|
int nr_samples; |
|
|
s64 delay; |
|
|
u8 *buffer; |
|
|
|
|
|
delay = swr_get_delay(swr, codec->sample_rate); |
|
|
nr_samples = av_rescale_rnd(delay + frame->nb_samples, |
|
|
WAVE_SAMPLE_RATE, codec->sample_rate, |
|
|
AV_ROUND_UP); |
|
|
av_samples_alloc(&buffer, NULL, 1, nr_samples, AV_SAMPLE_FMT_S16, 0); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nr_samples = swr_convert(swr, &buffer, nr_samples, |
|
|
!flush ? (const u8 **)frame->data : NULL, |
|
|
!flush ? frame->nb_samples : 0); |
|
|
|
|
|
*data = (s16*)realloc(*data, (*size + nr_samples) * sizeof(s16)); |
|
|
memcpy(*data + *size, buffer, nr_samples * sizeof(s16)); |
|
|
*size += nr_samples; |
|
|
av_freep(&buffer); |
|
|
} |
|
|
|
|
|
static bool is_audio_stream(const AVStream *stream) |
|
|
{ |
|
|
if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) |
|
|
return true; |
|
|
|
|
|
return false; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size) |
|
|
{ |
|
|
LOG("decode_audio: input size: %d\n", audio_buf->size); |
|
|
AVFormatContext *fmt_ctx; |
|
|
AVIOContext *avio_ctx; |
|
|
AVStream *stream; |
|
|
AVCodecContext *codec; |
|
|
AVPacket *packet; |
|
|
AVFrame *frame; |
|
|
struct SwrContext *swr; |
|
|
u8 *avio_ctx_buffer; |
|
|
unsigned int i; |
|
|
int stream_index = -1; |
|
|
int err; |
|
|
const size_t errbuffsize = 1024; |
|
|
char errbuff[errbuffsize]; |
|
|
|
|
|
fmt_ctx = avformat_alloc_context(); |
|
|
avio_ctx_buffer = (u8*)av_malloc(AVIO_CTX_BUF_SZ); |
|
|
LOG("Creating an avio context: AVIO_CTX_BUF_SZ=%d\n", AVIO_CTX_BUF_SZ); |
|
|
avio_ctx = avio_alloc_context(avio_ctx_buffer, AVIO_CTX_BUF_SZ, 0, audio_buf, &read_packet, NULL, NULL); |
|
|
fmt_ctx->pb = avio_ctx; |
|
|
|
|
|
|
|
|
err = avformat_open_input(&fmt_ctx, NULL, NULL, NULL); |
|
|
if (err) { |
|
|
LOG("Could not read audio buffer: %d: %s\n", err, av_make_error_string(errbuff, errbuffsize, err)); |
|
|
return err; |
|
|
} |
|
|
|
|
|
err = avformat_find_stream_info(fmt_ctx, NULL); |
|
|
if (err < 0) { |
|
|
LOG("Could not retrieve stream info from audio buffer: %d\n", err); |
|
|
return err; |
|
|
} |
|
|
|
|
|
for (i = 0; i < fmt_ctx->nb_streams; i++) { |
|
|
if (is_audio_stream(fmt_ctx->streams[i])) { |
|
|
stream_index = i; |
|
|
break; |
|
|
} |
|
|
} |
|
|
|
|
|
if (stream_index == -1) { |
|
|
LOG("Could not retrieve audio stream from buffer\n"); |
|
|
return -1; |
|
|
} |
|
|
|
|
|
stream = fmt_ctx->streams[stream_index]; |
|
|
codec = avcodec_alloc_context3( |
|
|
avcodec_find_decoder(stream->codecpar->codec_id)); |
|
|
avcodec_parameters_to_context(codec, stream->codecpar); |
|
|
err = avcodec_open2(codec, avcodec_find_decoder(codec->codec_id), |
|
|
NULL); |
|
|
if (err) { |
|
|
LOG("Failed to open decoder for stream #%d in audio buffer\n", stream_index); |
|
|
return err; |
|
|
} |
|
|
|
|
|
|
|
|
swr = swr_alloc(); |
|
|
|
|
|
#if LIBAVCODEC_VERSION_MAJOR > 60 |
|
|
AVChannelLayout in_ch_layout = codec->ch_layout; |
|
|
AVChannelLayout out_ch_layout = AV_CHANNEL_LAYOUT_MONO; |
|
|
|
|
|
|
|
|
av_opt_set_chlayout(swr, "in_chlayout", &in_ch_layout, 0); |
|
|
av_opt_set_int(swr, "in_sample_rate", codec->sample_rate, 0); |
|
|
av_opt_set_sample_fmt(swr, "in_sample_fmt", codec->sample_fmt, 0); |
|
|
|
|
|
|
|
|
av_opt_set_chlayout(swr, "out_chlayout", &out_ch_layout, 0); |
|
|
av_opt_set_int(swr, "out_sample_rate", WAVE_SAMPLE_RATE, 0); |
|
|
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0); |
|
|
#else |
|
|
av_opt_set_int(swr, "in_channel_count", codec->channels, 0); |
|
|
av_opt_set_int(swr, "out_channel_count", 1, 0); |
|
|
av_opt_set_int(swr, "in_channel_layout", codec->channel_layout, 0); |
|
|
av_opt_set_int(swr, "out_channel_layout", AV_CH_LAYOUT_MONO, 0); |
|
|
av_opt_set_int(swr, "in_sample_rate", codec->sample_rate, 0); |
|
|
av_opt_set_int(swr, "out_sample_rate", WAVE_SAMPLE_RATE, 0); |
|
|
av_opt_set_sample_fmt(swr, "in_sample_fmt", codec->sample_fmt, 0); |
|
|
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0); |
|
|
#endif |
|
|
|
|
|
swr_init(swr); |
|
|
if (!swr_is_initialized(swr)) { |
|
|
LOG("Resampler has not been properly initialized\n"); |
|
|
return -1; |
|
|
} |
|
|
|
|
|
packet=av_packet_alloc(); |
|
|
if (!packet) { |
|
|
LOG("Error allocating the packet\n"); |
|
|
return -1; |
|
|
} |
|
|
frame = av_frame_alloc(); |
|
|
if (!frame) { |
|
|
LOG("Error allocating the frame\n"); |
|
|
return -1; |
|
|
} |
|
|
|
|
|
|
|
|
*data = NULL; |
|
|
*size = 0; |
|
|
while (av_read_frame(fmt_ctx, packet) >= 0) { |
|
|
avcodec_send_packet(codec, packet); |
|
|
|
|
|
err = avcodec_receive_frame(codec, frame); |
|
|
if (err == AVERROR(EAGAIN)) |
|
|
continue; |
|
|
|
|
|
convert_frame(swr, codec, frame, data, size, false); |
|
|
} |
|
|
|
|
|
convert_frame(swr, codec, frame, data, size, true); |
|
|
|
|
|
av_packet_free(&packet); |
|
|
av_frame_free(&frame); |
|
|
swr_free(&swr); |
|
|
|
|
|
avcodec_free_context(&codec); |
|
|
avformat_close_input(&fmt_ctx); |
|
|
avformat_free_context(fmt_ctx); |
|
|
|
|
|
if (avio_ctx) { |
|
|
av_freep(&avio_ctx->buffer); |
|
|
av_freep(&avio_ctx); |
|
|
} |
|
|
|
|
|
return 0; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int ffmpeg_decode_audio(const std::string &ifname, std::vector<uint8_t>& owav_data) { |
|
|
LOG("ffmpeg_decode_audio: %s\n", ifname.c_str()); |
|
|
int ifd = open(ifname.c_str(), O_RDONLY); |
|
|
if (ifd == -1) { |
|
|
fprintf(stderr, "Couldn't open input file %s\n", ifname.c_str()); |
|
|
return -1; |
|
|
} |
|
|
u8 *ibuf = NULL; |
|
|
size_t ibuf_size; |
|
|
int err = map_file(ifd, &ibuf, &ibuf_size); |
|
|
if (err) { |
|
|
LOG("Couldn't map input file %s\n", ifname.c_str()); |
|
|
return err; |
|
|
} |
|
|
LOG("Mapped input file: %s size: %d\n", ibuf, (int) ibuf_size); |
|
|
struct audio_buffer inaudio_buf; |
|
|
inaudio_buf.ptr = ibuf; |
|
|
inaudio_buf.size = ibuf_size; |
|
|
|
|
|
s16 *odata=NULL; |
|
|
int osize=0; |
|
|
|
|
|
err = decode_audio(&inaudio_buf, &odata, &osize); |
|
|
LOG("decode_audio returned %d \n", err); |
|
|
if (err != 0) { |
|
|
LOG("decode_audio failed\n"); |
|
|
return err; |
|
|
} |
|
|
LOG("decode_audio output size: %d\n", osize); |
|
|
|
|
|
wave_hdr wh; |
|
|
const size_t outdatasize = osize * sizeof(s16); |
|
|
set_wave_hdr(wh, outdatasize); |
|
|
owav_data.resize(sizeof(wave_hdr) + outdatasize); |
|
|
|
|
|
memcpy(owav_data.data(), &wh, sizeof(wave_hdr)); |
|
|
|
|
|
memcpy(owav_data.data() + sizeof(wave_hdr), odata, osize* sizeof(s16)); |
|
|
|
|
|
return 0; |
|
|
} |
|
|
|