Spaces:
Building
Building
| english: | |
| unigrams: | |
| COCA_magazine_frequency_token: | |
| display_name: COCA Magazine Frequency (Token) | |
| description: Frequency and range data from COCA magazine subcorpus - token-based | |
| analysis | |
| file: resources/reference_lists/en/COCA_magazine_unigram_list.csv | |
| format: tsv | |
| columns: | |
| word: 0 | |
| frequency: 1 | |
| normalized_freq: 2 | |
| range: 3 | |
| dispersion: 4 | |
| has_header: false | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| - normalized_freq | |
| selectable_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| - dispersion | |
| default_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| default_log_transforms: | |
| - frequency | |
| - normalized_freq | |
| measure_classifications: | |
| frequency: frequency | |
| normalized_freq: frequency | |
| range: range | |
| dispersion: range | |
| COCA_magazine_frequency_lemma: | |
| display_name: COCA Magazine Frequency (Lemma) | |
| description: Frequency and range data from COCA magazine subcorpus - lemma-based | |
| analysis | |
| file: resources/reference_lists/en/COCA_magazine_unigram_list.csv | |
| format: tsv | |
| columns: | |
| has_header: false | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| - normalized_freq | |
| selectable_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| - dispersion | |
| default_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| default_log_transforms: | |
| - frequency | |
| - normalized_freq | |
| measure_classifications: | |
| frequency: frequency | |
| normalized_freq: frequency | |
| range: range | |
| dispersion: range | |
| COCA_spoken_frequency_token: | |
| display_name: COCA Spoken Frequency (Token) | |
| description: Frequency and range data from COCA spoken subcorpus - token-based | |
| analysis | |
| file: resources/reference_lists/en/COCA_spoken_unigram_list.csv | |
| format: tsv | |
| columns: | |
| word: 0 | |
| frequency: 1 | |
| normalized_freq: 2 | |
| range: 3 | |
| dispersion: 4 | |
| has_header: false | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| - normalized_freq | |
| selectable_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| - dispersion | |
| default_measures: | |
| - frequency | |
| - normalized_freq | |
| default_log_transforms: | |
| - frequency | |
| - normalized_freq | |
| COCA_spoken_frequency_lemma: | |
| display_name: COCA Spoken Frequency (Lemma) | |
| description: Frequency and range data from COCA spoken subcorpus - lemma-based | |
| analysis | |
| file: resources/reference_lists/en/COCA_spoken_unigram_list.csv | |
| format: tsv | |
| columns: | |
| word: 0 | |
| frequency: 1 | |
| normalized_freq: 2 | |
| range: 3 | |
| dispersion: 4 | |
| has_header: false | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| - normalized_freq | |
| selectable_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| - dispersion | |
| default_measures: | |
| - frequency | |
| - normalized_freq | |
| default_log_transforms: | |
| - frequency | |
| - normalized_freq | |
| # Psycholinguistic norm | |
| concreteness_ratings_token: | |
| display_name: Concreteness Ratings (Token) | |
| description: Concreteness ratings for English words (1-5 scale) - token-based | |
| analysis | |
| file: resources/reference_lists/en/Concreteness_Brysbaert.txt | |
| format: tsv | |
| columns: | |
| word: 0 | |
| concreteness: 1 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: [] | |
| selectable_measures: | |
| - concreteness | |
| default_measures: | |
| - concreteness | |
| default_log_transforms: [] | |
| measure_classifications: | |
| concreteness: psycholinguistic | |
| header_prefix: '#' | |
| concreteness_ratings_lemma: | |
| display_name: Concreteness Ratings (Lemma) | |
| description: Concreteness ratings for English words (1-5 scale) - lemma-based | |
| analysis | |
| file: resources/reference_lists/en/Concreteness_Brysbaert.txt | |
| format: tsv | |
| columns: | |
| has_header: true | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: [] | |
| selectable_measures: | |
| - concreteness | |
| default_measures: | |
| - concreteness | |
| default_log_transforms: [] | |
| measure_classifications: | |
| concreteness: psycholinguistic | |
| header_prefix: '#' | |
| aoa_ratings_token: | |
| display_name: Age of Acquisition (AOA) Ratings (lemma) | |
| description: Age of Acquisition (AOA) ratings for English words - lemma-based | |
| analysis | |
| file: resources/reference_lists/en/AoA_Brysbart.txt | |
| format: tsv | |
| columns: | |
| word: 0 | |
| AOA: 1 | |
| has_header: true | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: [] | |
| selectable_measures: | |
| - AOA | |
| default_measures: | |
| - AOA | |
| default_log_transforms: [] | |
| measure_classifications: | |
| concreteness: AOA | |
| header_prefix: '#' | |
| aoe_ratings_lemma: | |
| display_name: Age of Exposure (AOE) (Lemma) | |
| description: Age of Exposure (AOA) based on K-12 textbooks - lemma-based | |
| analysis | |
| file: resources/reference_lists/en/AOE_words_dictionary.csv | |
| format: tsv | |
| columns: | |
| word: 0 | |
| InverseAverage: 1 | |
| InverseLinearRegressionSlope: 2 | |
| IndexAboveThreshold40: 3 | |
| InflectionPointPolynomial: 4 | |
| has_header: true | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: [] | |
| selectable_measures: | |
| - InverseAverage | |
| - InverseLinearRegressionSlope | |
| - IndexAboveThreshold40 | |
| - InflectionPointPolynomial | |
| default_measures: | |
| - InflectionPointPolynomial | |
| measure_classifications: | |
| InverseAverage: psycholinguistic | |
| InverseLinearRegressionSlope: psycholinguistic | |
| IndexAboveThreshold40: psycholinguistic | |
| InflectionPointPolynomial: psycholinguistic | |
| header_prefix: '#' | |
| default_log_transforms: [] | |
| measure_classifications: | |
| InflectionPointPolynomial: AOE | |
| IndexAboveThreshold40: AOE | |
| header_prefix: '#' | |
| semd_token: | |
| display_name: SemD (Token) | |
| description: Semantic Diversity (SemD) for English words - token-based | |
| analysis | |
| file: resources/reference_lists/en/SemD.txt | |
| format: tsv | |
| columns: | |
| word: 0 | |
| semd: 1 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: [] | |
| selectable_measures: | |
| - semd | |
| default_measures: | |
| - semd | |
| default_log_transforms: [] | |
| measure_classifications: | |
| semd: contextual distinctiveness | |
| header_prefix: '#' | |
| mcd_cd_token: | |
| display_name: McD CD (Token) | |
| description: Macdonald Contextual Diversity (McD CD) for English words - token-based | |
| analysis | |
| file: resources/reference_lists/en/Mcd_CD.txt | |
| format: tsv | |
| columns: | |
| word: 0 | |
| mcd: 1 | |
| has_header: false | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: [] | |
| selectable_measures: | |
| - mcd | |
| default_measures: | |
| - mcd | |
| measure_classifications: | |
| mcd: contextual_diversity | |
| header_prefix: '#' | |
| academic_words_token: | |
| display_name: Academic Word List (Token) | |
| description: Common academic vocabulary for research writing - token-based analysis | |
| file: resources/reference_lists/en/academic_words_token.csv | |
| format: csv | |
| columns: | |
| word: 0 | |
| frequency: 1 | |
| has_header: true | |
| enabled: false | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| default_measures: | |
| - frequency | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| academic_words_lemma: | |
| display_name: Academic Word List (Lemma) | |
| description: Common academic vocabulary for research writing - lemma-based analysis | |
| file: resources/reference_lists/en/academic_words_lemma.csv | |
| format: csv | |
| columns: | |
| has_header: true | |
| enabled: false | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| default_measures: | |
| - frequency | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| bigrams: | |
| COCA_spoken_bigram_frequency_token: | |
| display_name: COCA Spoken Bigram Frequency (Token) | |
| description: Bigram frequencies and range data - token-based analysis | |
| file: resources/reference_lists/en/COCA_spoken_bigram_list.csv | |
| format: tsv | |
| columns: | |
| bigram: 0 | |
| frequency: 1 | |
| normalized_freq: 2 | |
| documents: 3 | |
| range: 4 | |
| has_header: false | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| - normalized_freq | |
| selectable_measures: | |
| - frequency | |
| - normalized_freq | |
| - documents | |
| - range | |
| default_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| default_log_transforms: | |
| - frequency | |
| - normalized_freq | |
| measure_classifications: | |
| frequency: frequency | |
| normalized_freq: frequency | |
| documents: range | |
| range: range | |
| COCA_spoken_bigram_frequency_lemma: | |
| display_name: COCA Spoken Bigram Frequency (Lemma) | |
| description: Bigram frequencies and range data - lemma-based analysis | |
| file: resources/reference_lists/en/COCA_spoken_bigram_list.csv | |
| format: tsv | |
| columns: | |
| has_header: false | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| - normalized_freq | |
| selectable_measures: | |
| - frequency | |
| - normalized_freq | |
| - documents | |
| - range | |
| default_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| default_log_transforms: | |
| - frequency | |
| - normalized_freq | |
| measure_classifications: | |
| frequency: frequency | |
| normalized_freq: frequency | |
| documents: range | |
| range: range | |
| COCA_spoken_bigram_association_token: | |
| display_name: COCA Spoken Bigram Associations (Token) | |
| description: Bigram association measures (MI, T-score, Delta P) - token-based | |
| analysis | |
| file: resources/reference_lists/en/spoken_bi_contingency.csv | |
| format: csv | |
| columns: | |
| bigram: 0 | |
| frequency: 1 | |
| mi_score: 2 | |
| mi_2_score: 3 | |
| t_score: 4 | |
| delta_p: 5 | |
| ap_collex: 6 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| - mi_score | |
| - mi_2_score | |
| - t_score | |
| - delta_p | |
| - ap_collex | |
| default_measures: | |
| - frequency | |
| - t_score | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| mi_score: association | |
| mi_2_score: association | |
| t_score: association | |
| delta_p: association | |
| ap_collex: association | |
| COCA_spoken_bigram_association_lemma: | |
| display_name: COCA Spoken Bigram Associations (Lemma) | |
| description: Bigram association measures (MI, T-score, Delta P) - lemma-based | |
| analysis | |
| file: resources/reference_lists/en/spoken_bigram_lemma_contingency.csv | |
| format: csv | |
| columns: | |
| bigram: 0 | |
| frequency: 1 | |
| mi_score: 5 | |
| mi_2_score: 6 | |
| t_score: 7 | |
| delta_p: 8 | |
| ap_collex: 9 | |
| has_header: true | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| - mi_score | |
| - mi_2_score | |
| - t_score | |
| - delta_p | |
| - ap_collex | |
| default_measures: | |
| - frequency | |
| - t_score | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| mi_score: association | |
| mi_2_score: association | |
| t_score: association | |
| delta_p: association | |
| ap_collex: association | |
| COCA_magazine_bigram_frequency_token: | |
| display_name: COCA Magazine Bigram Frequency (Token) | |
| description: Bigram frequencies and range data in Magazine - token-based analysis | |
| file: resources/reference_lists/en/COCA_magazine_bigram_list.csv | |
| format: tsv | |
| columns: | |
| bigram: 0 | |
| frequency: 1 | |
| normalized_freq: 2 | |
| documents: 3 | |
| range: 4 | |
| has_header: false | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| - normalized_freq | |
| selectable_measures: | |
| - frequency | |
| - normalized_freq | |
| - documents | |
| - range | |
| default_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| default_log_transforms: | |
| - frequency | |
| - normalized_freq | |
| measure_classifications: | |
| frequency: frequency | |
| normalized_freq: frequency | |
| documents: range | |
| range: range | |
| COCA_magazine_bigram_frequency_lemma: | |
| display_name: COCA Magazine Bigram Frequency (Lemma) | |
| description: Bigram frequencies and range data in Magazine - lemma-based analysis | |
| file: resources/reference_lists/en/COCA_spoken_bigram_list.csv | |
| format: tsv | |
| columns: | |
| has_header: false | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| - normalized_freq | |
| selectable_measures: | |
| - frequency | |
| - normalized_freq | |
| - documents | |
| - range | |
| default_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| default_log_transforms: | |
| - frequency | |
| - normalized_freq | |
| measure_classifications: | |
| frequency: frequency | |
| normalized_freq: frequency | |
| documents: range | |
| range: range | |
| COCA_magazine_bigram_association_token: | |
| display_name: COCA Magazine Bigram Associations (Token) | |
| description: Bigram association measures (MI, T-score, Delta P) - token-based | |
| analysis | |
| file: resources/reference_lists/en/magazine_bi_contingency.csv | |
| format: csv | |
| columns: | |
| bigram: 0 | |
| frequency: 1 | |
| mi_score: 2 | |
| mi_2_score: 3 | |
| t_score: 4 | |
| delta_p: 5 | |
| ap_collex: 6 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| - mi_score | |
| - mi_2_score | |
| - t_score | |
| - delta_p | |
| - ap_collex | |
| default_measures: | |
| - frequency | |
| - t_score | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| mi_score: association | |
| mi_2_score: association | |
| t_score: association | |
| delta_p: association | |
| ap_collex: association | |
| COCA_magazine_bigram_association_lemma: | |
| display_name: COCA Magazine Bigram Associations (Lemma) | |
| description: Bigram association measures (MI, T-score, Delta P) - lemma-based | |
| analysis | |
| file: resources/reference_lists/en/magazine_bigram_lemma_contingency.csv | |
| format: csv | |
| columns: | |
| has_header: true | |
| enabled: false | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| - mi_score | |
| - mi_2_score | |
| - t_score | |
| - delta_p | |
| - ap_collex | |
| default_measures: | |
| - frequency | |
| - t_score | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| mi_score: association | |
| mi_2_score: association | |
| t_score: association | |
| delta_p: association | |
| ap_collex: association | |
| trigrams: | |
| COCA_trigram_frequency_token: | |
| display_name: COCA Trigram Frequency (Token) | |
| description: Trigram frequencies and range data - token-based analysis | |
| file: resources/reference_lists/en/COCA_spoken_trigram_list.csv | |
| format: tsv | |
| columns: | |
| trigram: 0 | |
| frequency: 1 | |
| normalized_freq: 2 | |
| range: 3 | |
| dispersion: 4 | |
| has_header: false | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| - normalized_freq | |
| selectable_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| - dispersion | |
| default_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| default_log_transforms: | |
| - frequency | |
| - normalized_freq | |
| measure_classifications: | |
| frequency: frequency | |
| normalized_freq: frequency | |
| range: range | |
| dispersion: range | |
| COCA_trigram_frequency_lemma: | |
| display_name: COCA Trigram Frequency (Lemma) | |
| description: Trigram frequencies and range data - lemma-based analysis | |
| file: resources/reference_lists/en/COCA_spoken_trigram_list.csv | |
| format: tsv | |
| columns: | |
| has_header: false | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| - normalized_freq | |
| selectable_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| - dispersion | |
| default_measures: | |
| - frequency | |
| - normalized_freq | |
| - range | |
| default_log_transforms: | |
| - frequency | |
| - normalized_freq | |
| measure_classifications: | |
| frequency: frequency | |
| normalized_freq: frequency | |
| range: range | |
| dispersion: range | |
| COCA_trigram_assoc_uni_bi_token: | |
| display_name: COCA Trigram→Bigram Associations (Token) | |
| description: Trigram to bigram association measures - token-based analysis | |
| file: resources/reference_lists/en/spoken_tri_contingency_1.csv | |
| format: csv | |
| columns: | |
| trigram: 0 | |
| frequency: 1 | |
| mi_score: 2 | |
| mi_2_score: 3 | |
| t_score: 4 | |
| delta_p: 5 | |
| ap_collex: 6 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| - mi_score | |
| - mi_2_score | |
| - t_score | |
| - delta_p | |
| - ap_collex | |
| default_measures: | |
| - frequency | |
| - t_score | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| mi_score: association | |
| mi_2_score: association | |
| t_score: association | |
| delta_p: association | |
| ap_collex: association | |
| COCA_trigram_assoc_uni_bi_lemma: | |
| display_name: COCA Trigram→Bigram Associations (Lemma) | |
| description: Trigram to bigram association measures - lemma-based analysis | |
| file: resources/reference_lists/en/spoken_trigram_lemma_contingency_1.csv | |
| format: csv | |
| columns: | |
| has_header: true | |
| enabled: false | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| - mi_score | |
| - mi_2_score | |
| - t_score | |
| - delta_p | |
| - ap_collex | |
| default_measures: | |
| - frequency | |
| - t_score | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| mi_score: association | |
| mi_2_score: association | |
| t_score: association | |
| delta_p: association | |
| ap_collex: association | |
| COCA_trigram_assoc_bi_uni_token: | |
| display_name: COCA Trigram→Unigram Associations (Token) | |
| description: Trigram to unigram association measures - token-based analysis | |
| file: resources/reference_lists/en/spoken_tri_contingency_2.csv | |
| format: csv | |
| columns: | |
| trigram: 0 | |
| frequency: 1 | |
| mi_score: 2 | |
| mi_2_score: 3 | |
| t_score: 4 | |
| delta_p: 5 | |
| ap_collex: 6 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| - mi_score | |
| - mi_2_score | |
| - t_score | |
| - delta_p | |
| - ap_collex | |
| default_measures: | |
| - frequency | |
| - t_score | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| mi_score: association | |
| mi_2_score: association | |
| t_score: association | |
| delta_p: association | |
| ap_collex: association | |
| COCA_trigram_assoc_bi_uni_lemma: | |
| display_name: COCA Trigram→Unigram Associations (Lemma) | |
| description: Trigram to unigram association measures - lemma-based analysis | |
| file: resources/reference_lists/en/spoken_trigram_lemma_contingency_2.csv | |
| format: csv | |
| columns: | |
| has_header: true | |
| enabled: false | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| - mi_score | |
| - mi_2_score | |
| - t_score | |
| - delta_p | |
| - ap_collex | |
| default_measures: | |
| - frequency | |
| - t_score | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| mi_score: association | |
| mi_2_score: association | |
| t_score: association | |
| delta_p: association | |
| ap_collex: association | |
| japanese: | |
| unigrams: | |
| BCCWJ_frequency_token: | |
| display_name: BCCWJ Written - Frequency (Token) | |
| description: BCCWJ raw frequency counts for written Japanese - token-based analysis | |
| file: resources/reference_lists/ja/BCCWJ_frequencylist_suw_ver1_1.tsv | |
| format: tsv | |
| columns: | |
| surface_form: 1 | |
| lemma: 2 | |
| pos: 3 | |
| frequency: 6 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| BCCWJ_frequency_lemma: | |
| display_name: BCCWJ Written - Frequency (Lemma) | |
| description: BCCWJ raw frequency counts for written Japanese - lemma-based analysis | |
| file: resources/reference_lists/ja/BCCWJ_frequencylist_suw_ver1_1.tsv | |
| format: tsv | |
| columns: | |
| has_header: true | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| BCCWJ_pmw_token: | |
| display_name: BCCWJ Written - Per Million Words (Token) | |
| description: BCCWJ normalized frequency for written Japanese - token-based analysis | |
| file: resources/reference_lists/ja/BCCWJ_frequencylist_suw_ver1_1.tsv | |
| format: tsv | |
| columns: | |
| surface_form: 1 | |
| lemma: 2 | |
| pos: 3 | |
| frequency: 7 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| BCCWJ_pmw_lemma: | |
| display_name: BCCWJ Written - Per Million Words (Lemma) | |
| description: BCCWJ normalized frequency for written Japanese - lemma-based analysis | |
| file: resources/reference_lists/ja/BCCWJ_frequencylist_suw_ver1_1.tsv | |
| format: tsv | |
| columns: | |
| has_header: true | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| BCCWJ_rank_token: | |
| display_name: BCCWJ Written - Frequency Rank (Token) | |
| description: BCCWJ frequency ranking for written Japanese - token-based analysis | |
| file: resources/reference_lists/ja/BCCWJ_frequencylist_suw_ver1_1.tsv | |
| format: tsv | |
| columns: | |
| surface_form: 1 | |
| lemma: 2 | |
| pos: 3 | |
| frequency: 0 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| BCCWJ_rank_lemma: | |
| display_name: BCCWJ Written - Frequency Rank (Lemma) | |
| description: BCCWJ frequency ranking for written Japanese - lemma-based analysis | |
| file: resources/reference_lists/ja/BCCWJ_frequencylist_suw_ver1_1.tsv | |
| format: tsv | |
| columns: | |
| has_header: true | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| CSJ_frequency_token: | |
| display_name: CSJ Spoken - Frequency (Token) | |
| description: CSJ raw frequency counts for spoken Japanese - token-based analysis | |
| file: resources/reference_lists/ja/CSJ_frequencylist_suw_ver201803.tsv | |
| format: tsv | |
| columns: | |
| surface_form: 1 | |
| lemma: 2 | |
| pos: 3 | |
| frequency: 6 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| CSJ_frequency_lemma: | |
| display_name: CSJ Spoken - Frequency (Lemma) | |
| description: CSJ raw frequency counts for spoken Japanese - lemma-based analysis | |
| file: resources/reference_lists/ja/CSJ_frequencylist_suw_ver201803.tsv | |
| format: tsv | |
| columns: | |
| has_header: true | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| CSJ_pmw_token: | |
| display_name: CSJ Spoken - Per Million Words (Token) | |
| description: CSJ normalized frequency for spoken Japanese - token-based analysis | |
| file: resources/reference_lists/ja/CSJ_frequencylist_suw_ver201803.tsv | |
| format: tsv | |
| columns: | |
| surface_form: 1 | |
| lemma: 2 | |
| pos: 3 | |
| frequency: 7 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| CSJ_pmw_lemma: | |
| display_name: CSJ Spoken - Per Million Words (Lemma) | |
| description: CSJ normalized frequency for spoken Japanese - lemma-based analysis | |
| file: resources/reference_lists/ja/CSJ_frequencylist_suw_ver201803.tsv | |
| format: tsv | |
| columns: | |
| has_header: true | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| CSJ_rank_token: | |
| display_name: CSJ Spoken - Frequency Rank (Token) | |
| description: CSJ frequency ranking for spoken Japanese - token-based analysis | |
| file: resources/reference_lists/ja/CSJ_frequencylist_suw_ver201803.tsv | |
| format: tsv | |
| columns: | |
| surface_form: 1 | |
| lemma: 2 | |
| pos: 3 | |
| frequency: 0 | |
| has_header: true | |
| enabled: true | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| CSJ_rank_lemma: | |
| display_name: CSJ Spoken - Frequency Rank (Lemma) | |
| description: CSJ frequency ranking for spoken Japanese - lemma-based analysis | |
| file: resources/reference_lists/ja/CSJ_frequencylist_suw_ver201803.tsv | |
| format: tsv | |
| columns: | |
| has_header: true | |
| enabled: true | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - pos | |
| - frequency | |
| default_measures: | |
| - frequency | |
| - pos | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| pos: unknown | |
| frequency: frequency | |
| japanese_corpus: true | |
| jp_frequency_token: | |
| display_name: Japanese Frequency List (Token) | |
| description: Frequency data for Japanese words - token-based analysis | |
| file: resources/reference_lists/ja/jp_frequency_token.csv | |
| format: csv | |
| columns: | |
| word: 0 | |
| frequency: 1 | |
| has_header: true | |
| enabled: false | |
| analysis_type: token | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| default_measures: | |
| - frequency | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |
| jp_frequency_lemma: | |
| display_name: Japanese Frequency List (Lemma) | |
| description: Frequency data for Japanese words - lemma-based analysis | |
| file: resources/reference_lists/ja/jp_frequency_lemma.csv | |
| format: csv | |
| columns: | |
| has_header: true | |
| enabled: false | |
| analysis_type: lemma | |
| log_transformable: | |
| - frequency | |
| selectable_measures: | |
| - frequency | |
| default_measures: | |
| - frequency | |
| default_log_transforms: | |
| - frequency | |
| measure_classifications: | |
| frequency: frequency | |