File size: 5,082 Bytes
2f6b10b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
library(tidyr)
library(dplyr)
library(readr)
library(stringr)
root_dir = "D:/Data/experiments/alignment_benchmarking/alignments"
evals = list.dirs(root_dir, recursive = F, full.names = F)
data = data.frame()
for (e in evals){
corpora = list.dirs(file.path(root_dir, e), recursive = F, full.names = F)
for (c in corpora){
print(e)
print(c)
path = file.path(root_dir, e, c, "alignment_reference_evaluation.csv")
if (!file.exists(path)){
next
}
print(path)
d = read_csv(path, show_col_types = F, lazy=F)
d$alignment_score <- as.numeric(d$alignment_score)
d$utterance <- paste(d$file, str_replace_all(as.character(d$begin), '\\.', '-'), str_replace_all(as.character(d$end), '\\.', '-'), sep="-")
d$evaluation = e
d$corpus = c
data = bind_rows(data,d)
}
}
data$evaluation = factor(data$evaluation)
data$version = "train"
data[str_detect(data$evaluation, '_1.0'),]$version = "1.0"
data[str_detect(data$evaluation, '_2.0'),]$version = "2.0"
data[str_detect(data$evaluation, '_2.0a'),]$version = "2.0a"
data[str_detect(data$evaluation, '_2.1'),]$version = "2.1"
data[str_detect(data$evaluation, '_2.2'),]$version = "2.2"
data[str_detect(data$evaluation, '_3.0'),]$version = "3.0"
data[str_detect(data$evaluation, '_3.1'),]$version = "3.1"
#data[str_detect(data$evaluation, 'trained_2.2'),]$version = "trained_2.2"
data[str_detect(data$evaluation, 'trained_3.0'),]$version = "trained_3.0"
data$version <- factor(data$version)
data$adapted = "Not adapted"
data[str_detect(data$evaluation, '_adapt'),]$adapted = "Adapted"
data$adapted <- factor(data$adapted)
data$finetuned = "Not finetuned"
data[str_detect(data$evaluation, '_finetune'),]$finetuned = "Finetuned"
data$finetuned <- factor(data$finetuned)
data$phone_set = "mfa"
data[str_detect(data$evaluation, 'arpa'),]$phone_set = "arpa"
data[str_detect(data$evaluation, 'gp'),]$phone_set = "gp"
data$phone_set <- factor(data$phone_set)
data <- subset(data, !is.na(data$alignment_score))
data = subset(data, word_count > 1)
data = subset(data, !(word_count == 2 & reference_phone_count == 2))
data <- subset(data, version %in% c("3.0", "3.1"))
plotData <- summarySE(data=data, measurevar = 'alignment_score', groupvars=c('evaluation', 'adapted', 'finetuned', 'corpus','phone_set'))
ggplot(aes(x=evaluation, y=mean * 1000), data=plotData) + geom_point(size = 5, color='#FB5607') +
geom_errorbar(aes(ymin = (mean - ci) * 1000, ymax = (mean + ci)* 1000),size=2, width=0.5, color='#FB5607') +
ylab('Phone boundary error (ms)') + xlab('Alignment condition') +ggtitle('Phone boundary errors') +
theme_memcauliffe() +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) + facet_trelliscope(phone_set~corpus*adapted, ncol = 2, scales="free_x")
plotData <- summarySE(data=data, measurevar = 'phone_error_rate', groupvars=c('version', 'corpus','phone_set'))
ggplot(aes(x=version, y=mean * 100), data=plotData) + geom_point(size = 5, color='#FB5607') +
geom_errorbar(aes(ymin = (mean - ci) * 100, ymax = (mean + ci)* 100),size=2, width=0.5, color='#FB5607') +
ylab('Phone error rate %') + xlab('Alignment condition') +ggtitle('Phone error rate') +
theme_memcauliffe() +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) + facet_trelliscope(phone_set~corpus, ncol = 2, scales="free_x")
labphon_data <- subset(data, corpus %in% c('timit', 'buckeye', 'csj', 'seoul') & evaluation %in% c('arpa_1.0', 'gp_1.0', 'mfa_3.0'))
labphon_data[labphon_data$evaluation == 'gp_1.0',]$evaluation = 'arpa_1.0'
labphon_data$evaluation <- factor(labphon_data$evaluation)
plotData <- summarySE(data=labphon_data, measurevar = 'alignment_score', groupvars=c('evaluation', 'corpus'))
ggplot(aes(x=evaluation, y=mean * 1000, color=corpus, group=corpus), data=plotData) +geom_path() + geom_point(size = 2.5) +
ylab('Phone boundary error (ms)') + xlab('MFA version') +ggtitle('Phone boundary errors') +
theme_memcauliffe() +
scale_x_discrete(labels=c("1.0", "3.0")) +
scale_color_manual(values=cbbPalette, labels=c('Buckeye', 'CSJ', "Seoul", "TIMIT"), name='Corpus')
ggsave("docs/source/_static/benchmarks/phone_alignment.png", width=1000, height=800, units="px", dpi=200)
uw_colloquium_data <- subset(data, corpus %in% c('timit', 'buckeye', 'csj', 'seoul') & evaluation %in% c('mfa_3.0'))
uw_colloquium_data$corpus <- factor(uw_colloquium_data$corpu, levels=c('timit', 'buckeye', 'csj', 'seoul'))
plotData <- summarySE(data=uw_colloquium_data, measurevar = 'alignment_score', groupvars=c('corpus'))
ggplot(aes(x=corpus, y=mean * 1000), data=plotData) + geom_point(size = 6, color='#FB5607') +
ylab('Phone boundary error (ms)') + xlab('Corpus') +ggtitle('Phone boundary errors') +
theme_memcauliffe() +
scale_x_discrete(labels=c("English-TIMIT", 'English-Buckeye', 'Japanese-CSJ', "Korean-Seoul"))
ggsave("docs/source/_static/benchmarks/uw_phone_alignment.png", width=1500, height=800, units="px", dpi=150)
t <- subset(data, corpus=='seoul'& version=='3.0')
subset(data, corpus=='buckeye' & phone_set=='mfa') %>% group_by(version) %>% summarise(n())
|