|
|
library(tidyr) |
|
|
library(dplyr) |
|
|
library(readr) |
|
|
library(stringr) |
|
|
root_dir = "D:/Data/experiments/alignment_benchmarking/alignments" |
|
|
|
|
|
evals = list.dirs(root_dir, recursive = F, full.names = F) |
|
|
|
|
|
data = data.frame() |
|
|
|
|
|
for (e in evals){ |
|
|
corpora = list.dirs(file.path(root_dir, e), recursive = F, full.names = F) |
|
|
for (c in corpora){ |
|
|
|
|
|
print(e) |
|
|
print(c) |
|
|
path = file.path(root_dir, e, c, "alignment_reference_evaluation.csv") |
|
|
if (!file.exists(path)){ |
|
|
next |
|
|
} |
|
|
print(path) |
|
|
d = read_csv(path, show_col_types = F, lazy=F) |
|
|
d$alignment_score <- as.numeric(d$alignment_score) |
|
|
d$utterance <- paste(d$file, str_replace_all(as.character(d$begin), '\\.', '-'), str_replace_all(as.character(d$end), '\\.', '-'), sep="-") |
|
|
d$evaluation = e |
|
|
d$corpus = c |
|
|
data = bind_rows(data,d) |
|
|
} |
|
|
} |
|
|
|
|
|
data$evaluation = factor(data$evaluation) |
|
|
|
|
|
|
|
|
data$version = "train" |
|
|
data[str_detect(data$evaluation, '_1.0'),]$version = "1.0" |
|
|
data[str_detect(data$evaluation, '_2.0'),]$version = "2.0" |
|
|
data[str_detect(data$evaluation, '_2.0a'),]$version = "2.0a" |
|
|
data[str_detect(data$evaluation, '_2.1'),]$version = "2.1" |
|
|
data[str_detect(data$evaluation, '_2.2'),]$version = "2.2" |
|
|
data[str_detect(data$evaluation, '_3.0'),]$version = "3.0" |
|
|
data[str_detect(data$evaluation, '_3.1'),]$version = "3.1" |
|
|
|
|
|
data[str_detect(data$evaluation, 'trained_3.0'),]$version = "trained_3.0" |
|
|
data$version <- factor(data$version) |
|
|
|
|
|
data$adapted = "Not adapted" |
|
|
data[str_detect(data$evaluation, '_adapt'),]$adapted = "Adapted" |
|
|
data$adapted <- factor(data$adapted) |
|
|
|
|
|
data$finetuned = "Not finetuned" |
|
|
data[str_detect(data$evaluation, '_finetune'),]$finetuned = "Finetuned" |
|
|
data$finetuned <- factor(data$finetuned) |
|
|
|
|
|
data$phone_set = "mfa" |
|
|
data[str_detect(data$evaluation, 'arpa'),]$phone_set = "arpa" |
|
|
data[str_detect(data$evaluation, 'gp'),]$phone_set = "gp" |
|
|
data$phone_set <- factor(data$phone_set) |
|
|
|
|
|
data <- subset(data, !is.na(data$alignment_score)) |
|
|
data = subset(data, word_count > 1) |
|
|
data = subset(data, !(word_count == 2 & reference_phone_count == 2)) |
|
|
|
|
|
|
|
|
data <- subset(data, version %in% c("3.0", "3.1")) |
|
|
|
|
|
plotData <- summarySE(data=data, measurevar = 'alignment_score', groupvars=c('evaluation', 'adapted', 'finetuned', 'corpus','phone_set')) |
|
|
|
|
|
ggplot(aes(x=evaluation, y=mean * 1000), data=plotData) + geom_point(size = 5, color='#FB5607') + |
|
|
geom_errorbar(aes(ymin = (mean - ci) * 1000, ymax = (mean + ci)* 1000),size=2, width=0.5, color='#FB5607') + |
|
|
ylab('Phone boundary error (ms)') + xlab('Alignment condition') +ggtitle('Phone boundary errors') + |
|
|
theme_memcauliffe() + |
|
|
scale_x_discrete(guide = guide_axis(n.dodge = 2)) + facet_trelliscope(phone_set~corpus*adapted, ncol = 2, scales="free_x") |
|
|
|
|
|
plotData <- summarySE(data=data, measurevar = 'phone_error_rate', groupvars=c('version', 'corpus','phone_set')) |
|
|
|
|
|
ggplot(aes(x=version, y=mean * 100), data=plotData) + geom_point(size = 5, color='#FB5607') + |
|
|
geom_errorbar(aes(ymin = (mean - ci) * 100, ymax = (mean + ci)* 100),size=2, width=0.5, color='#FB5607') + |
|
|
ylab('Phone error rate %') + xlab('Alignment condition') +ggtitle('Phone error rate') + |
|
|
theme_memcauliffe() + |
|
|
scale_x_discrete(guide = guide_axis(n.dodge = 2)) + facet_trelliscope(phone_set~corpus, ncol = 2, scales="free_x") |
|
|
|
|
|
|
|
|
labphon_data <- subset(data, corpus %in% c('timit', 'buckeye', 'csj', 'seoul') & evaluation %in% c('arpa_1.0', 'gp_1.0', 'mfa_3.0')) |
|
|
|
|
|
labphon_data[labphon_data$evaluation == 'gp_1.0',]$evaluation = 'arpa_1.0' |
|
|
labphon_data$evaluation <- factor(labphon_data$evaluation) |
|
|
|
|
|
|
|
|
plotData <- summarySE(data=labphon_data, measurevar = 'alignment_score', groupvars=c('evaluation', 'corpus')) |
|
|
|
|
|
|
|
|
ggplot(aes(x=evaluation, y=mean * 1000, color=corpus, group=corpus), data=plotData) +geom_path() + geom_point(size = 2.5) + |
|
|
ylab('Phone boundary error (ms)') + xlab('MFA version') +ggtitle('Phone boundary errors') + |
|
|
theme_memcauliffe() + |
|
|
scale_x_discrete(labels=c("1.0", "3.0")) + |
|
|
scale_color_manual(values=cbbPalette, labels=c('Buckeye', 'CSJ', "Seoul", "TIMIT"), name='Corpus') |
|
|
|
|
|
ggsave("docs/source/_static/benchmarks/phone_alignment.png", width=1000, height=800, units="px", dpi=200) |
|
|
|
|
|
|
|
|
uw_colloquium_data <- subset(data, corpus %in% c('timit', 'buckeye', 'csj', 'seoul') & evaluation %in% c('mfa_3.0')) |
|
|
uw_colloquium_data$corpus <- factor(uw_colloquium_data$corpu, levels=c('timit', 'buckeye', 'csj', 'seoul')) |
|
|
|
|
|
|
|
|
plotData <- summarySE(data=uw_colloquium_data, measurevar = 'alignment_score', groupvars=c('corpus')) |
|
|
|
|
|
|
|
|
ggplot(aes(x=corpus, y=mean * 1000), data=plotData) + geom_point(size = 6, color='#FB5607') + |
|
|
ylab('Phone boundary error (ms)') + xlab('Corpus') +ggtitle('Phone boundary errors') + |
|
|
theme_memcauliffe() + |
|
|
scale_x_discrete(labels=c("English-TIMIT", 'English-Buckeye', 'Japanese-CSJ', "Korean-Seoul")) |
|
|
|
|
|
ggsave("docs/source/_static/benchmarks/uw_phone_alignment.png", width=1500, height=800, units="px", dpi=150) |
|
|
|
|
|
|
|
|
|
|
|
t <- subset(data, corpus=='seoul'& version=='3.0') |
|
|
|
|
|
|
|
|
subset(data, corpus=='buckeye' & phone_set=='mfa') %>% group_by(version) %>% summarise(n()) |
|
|
|