Upload folder using huggingface_hub

41f6dd8 verified over 1 year ago

4.87 kB

	# .PHONY: $1
	# $1: $1.binphr.idx
	# $1.txt.gz: \| L1text = $4
	# $1.txt.gz: \| L2text = $5
	# $1.txt.gz: \| symal = $6
	# ${moses.ini}: $1
	# PTABLES += 1;$2;$3;5;$1
	# endef


	# ${target}.tmp/fwd/scored.gz: \| ${target}/phrase-extraction.DONE
	# \| ${L1File} ${L2File} ${symal}


	# # convert phrase table from text file to binary format
	# %.binphr.idx: \| %.txt.gz ${MOSES_BIN}/processPhraseTable
	# $(lock)
	# zcat -f $*.txt.gz \| ${MOSES_BIN}/processPhraseTable \
	# -ttable ${L1factors} ${L2factors} - -nscores 5 -out ${@D}/_${@F} \
	# && mv ${@D}/_${@F} $@
	# $(unlock)


	# # directory definitions
	# mo_mdl = model
	# mo_tmp = model/tmp
	# wrdaln = ${fstaln}/out
	# # wrdaln should be set elsewhere!

	# # milestone files created during phrase table construction
	# ptable_bin = ${mo_mdl}/ptable.${L1}-${L2}
	# ptable = ${mo_mdl}/ptable.${L1}-${L2}.txt.gz
	# lex1given2 = ${mo_mdl}/${L1}-given-${L2}.lex.gz
	# lex2given1 = ${mo_mdl}/${L2}-given-${L1}.lex.gz
	# mosesinifile = ${mo_mdl}/moses.ini.0

	# .PHONY: lex ptable
	# lex: ${lex1given2} ${lex2given1}
	# ptable: ${ptable_bin}

	# # steps taken in this module

	# # -------------------------------------------------------------------------------
	# # --- STEP 1a: extract raw phrases from word-aligned corpus ---------------------
	# # -------------------------------------------------------------------------------
	# # Note: the script ${moses.extract-phrases} takes care of initial sorting
	# ${mo_tmp}/phrase-extraction.DONE: \| ${moses.extract-phrases}
	# ${mo_tmp}/phrase-extraction.DONE: \| ${moses.extract}
	# ${mo_tmp}/phrase-extraction.DONE: \| ${wrdaln}/${L1}.txt.gz
	# ${mo_tmp}/phrase-extraction.DONE: \| ${wrdaln}/${L2}.txt.gz
	# ${mo_tmp}/phrase-extraction.DONE: \| ${wrdaln}/${L1}-${L2}.symal.gz
	# ${mo_tmp}/phrase-extraction.DONE:
	# $(lock)
	# ${moses.extract-phrases} \
	# ${moses.extract} \
	# ${wrdaln}/${L1}.txt.gz \
	# ${wrdaln}/${L2}.txt.gz \
	# ${wrdaln}/${L1}-${L2}.symal.gz \
	# ${mo_tmp} ${max_phrase_length} \
	# ${dmodel.type}-${dmodel.orientation} \
	# && touch $@
	# $(unlock)

	# # -------------------------------------------------------------------------------
	# # --- STEP 1a: extract word translation lexica from word-aligned corpus ---------
	# # --- (for lexical phrase scoring) ---------
	# # -------------------------------------------------------------------------------
	# $(lex2given1): $(lex1given2)
	# $(lex1given2): \| ${wrdaln}/${L1}.txt.gz
	# $(lex1given2): \| ${wrdaln}/${L2}.txt.gz
	# $(lex1given2): \| ${wrdaln}/${L1}-${L2}.symal.gz
	# $(lock)
	# $(moses.make-lex) \
	# ${wrdaln}/${L1}.txt.gz \
	# ${wrdaln}/${L2}.txt.gz \
	# ${wrdaln}/${L1}-${L2}.symal.gz \
	# $(lex1given2) \
	# $(lex2given1)
	# $(unlock)

	# # -------------------------------------------------------------------------------
	# # --- STEP 2: score extracted phrase pairs --------------------------------------
	# # -------------------------------------------------------------------------------
	# ptfwdhalf = ${mo_tmp}/fwd/phrases.fwd.scored.gz
	# ptbwdhalf = ${mo_tmp}/bwd/phrase-scoring.DONE

	# # -------------------------------------------------------------------------------
	# # --- STEP 2a: score phrases in the 'forward' direction -------------------------
	# # -------------------------------------------------------------------------------
	# $(ptfwdhalf): \| ${mo_tmp}/phrase-extraction.DONE
	# $(ptfwdhalf): \| ${lex1given2}
	# $(lock)
	# $(merge-sorted) ${mo_tmp}/fwd/part.*.gz \
	# \| ${moses.score-phrases} ${MOSES_BIN}/score - ${lex1given2} ${@:.scored.gz=} \
	# $(ptable.smoothing) && mv $@_ $@
	# $(unlock)

	# # -------------------------------------------------------------------------------
	# # --- STEP 2b: score phrases in the 'backward' direction -------------------------
	# # -------------------------------------------------------------------------------
	# # Note: ${moses.score-phrases} re-sorts the scored backward phrases
	# $(ptbwdhalf): \| ${mo_tmp}/phrase-extraction.DONE
	# $(ptbwdhalf): \| ${lex2given1}
	# $(lock)
	# $(merge-sorted) ${mo_tmp}/bwd/part.*.gz \
	# \| ${moses.score-phrases} ${MOSES_BIN}/score - ${lex2given1} ${@D}/scored \
	# "$(ptable.smoothing)" --Inverse && touch $@
	# $(unlock)

	# # -------------------------------------------------------------------------------
	# # --- STEP 3: put the two phrase table halves together --------------------------
	# # -------------------------------------------------------------------------------
	# # ptfwdhalf is a single .gz file, ptbwdhalf is a collection .gz files
	# $(ptable): \| ${MOSES_BIN}/consolidate
	# $(ptable): \| $(ptfwdhalf) $(ptbwdhalf)
	# $(lock)
	# ${MOSES_BIN}/consolidate \
	# <(zcat ${ptfwdhalf}) \
	# <(${merge-sorted} ${mo_tmp}/bwd/scored.*.gz) /dev/stdout \
	# $(if $(ptable.smoothing), \
	# $(ptable.smoothing) $(ptfwdhalf:.sorted.gz=.coc)) \
	# \| gzip > $@_ && mv $@_ $@
	# $(unlock)