Spaces:
Building
Building
tag set
Browse files- test_column_matching.py β test/test_column_matching.py +0 -0
- test_csv_comma_handling.py β test/test_csv_comma_handling.py +0 -0
- test_plot_fix.py β test/test_plot_fix.py +0 -0
- test_reference_loading_issue.py β test/test_reference_loading_issue.py +0 -0
- web_app/app.py +1 -1
- web_app/components/ui_components.py +7 -7
- web_app/handlers/pos_handlers.py +26 -0
test_column_matching.py β test/test_column_matching.py
RENAMED
|
File without changes
|
test_csv_comma_handling.py β test/test_csv_comma_handling.py
RENAMED
|
File without changes
|
test_plot_fix.py β test/test_plot_fix.py
RENAMED
|
File without changes
|
test_reference_loading_issue.py β test/test_reference_loading_issue.py
RENAMED
|
File without changes
|
web_app/app.py
CHANGED
|
@@ -48,7 +48,7 @@ def main():
|
|
| 48 |
# Route to appropriate interface
|
| 49 |
if tool_choice == 'Lexical Sophistication':
|
| 50 |
render_lexical_sophistication_interface()
|
| 51 |
-
elif tool_choice == 'POS Parser':
|
| 52 |
render_pos_parser_interface()
|
| 53 |
elif tool_choice == 'Corpus Data Visualizer':
|
| 54 |
render_corpus_visualization_interface()
|
|
|
|
| 48 |
# Route to appropriate interface
|
| 49 |
if tool_choice == 'Lexical Sophistication':
|
| 50 |
render_lexical_sophistication_interface()
|
| 51 |
+
elif tool_choice == 'POS & Dependency Parser':
|
| 52 |
render_pos_parser_interface()
|
| 53 |
elif tool_choice == 'Corpus Data Visualizer':
|
| 54 |
render_corpus_visualization_interface()
|
web_app/components/ui_components.py
CHANGED
|
@@ -106,9 +106,9 @@ class UIComponents:
|
|
| 106 |
st.subheader("SpaCy Model")
|
| 107 |
new_model_size = st.selectbox(
|
| 108 |
"Model Size",
|
| 109 |
-
options=['
|
| 110 |
format_func=lambda x: 'Transformer (trf)' if x == 'trf' else 'Medium (md)',
|
| 111 |
-
index=0 if st.session_state.model_size == '
|
| 112 |
)
|
| 113 |
|
| 114 |
# Only update if changed
|
|
@@ -122,7 +122,7 @@ class UIComponents:
|
|
| 122 |
st.subheader("Analysis Tools")
|
| 123 |
return st.radio(
|
| 124 |
"Select Tool",
|
| 125 |
-
options=['Lexical Sophistication', 'POS Parser', 'Frequency Analysis', 'Corpus Data Visualizer'],
|
| 126 |
key='tool_choice'
|
| 127 |
)
|
| 128 |
|
|
@@ -195,9 +195,9 @@ class UIComponents:
|
|
| 195 |
col1, col2 = st.columns(2)
|
| 196 |
|
| 197 |
with col1:
|
| 198 |
-
token_analysis = st.checkbox("
|
| 199 |
with col2:
|
| 200 |
-
lemma_analysis = st.checkbox("
|
| 201 |
|
| 202 |
# Global Options
|
| 203 |
st.write("### βοΈ Global Options")
|
|
@@ -319,7 +319,7 @@ class UIComponents:
|
|
| 319 |
# Group-level enable/disable checkbox
|
| 320 |
group_key = f"group_enabled_{base_name}"
|
| 321 |
group_enabled = st.checkbox(
|
| 322 |
-
f"
|
| 323 |
value=True, # Default enabled
|
| 324 |
key=group_key,
|
| 325 |
help=f"Enable/disable all {base_name} analyses"
|
|
@@ -424,7 +424,7 @@ class UIComponents:
|
|
| 424 |
# Measure checkbox (pre-selected based on defaults)
|
| 425 |
measure_key = f"measure_{entry_name}_{measure}"
|
| 426 |
selected = st.checkbox(
|
| 427 |
-
f"
|
| 428 |
value=measure in st.session_state[f'custom_measures_{entry_name}'],
|
| 429 |
key=measure_key,
|
| 430 |
help=f"Include {measure} in analysis"
|
|
|
|
| 106 |
st.subheader("SpaCy Model")
|
| 107 |
new_model_size = st.selectbox(
|
| 108 |
"Model Size",
|
| 109 |
+
options=['md', 'trf'],
|
| 110 |
format_func=lambda x: 'Transformer (trf)' if x == 'trf' else 'Medium (md)',
|
| 111 |
+
index=0 if st.session_state.model_size == 'md' else 1
|
| 112 |
)
|
| 113 |
|
| 114 |
# Only update if changed
|
|
|
|
| 122 |
st.subheader("Analysis Tools")
|
| 123 |
return st.radio(
|
| 124 |
"Select Tool",
|
| 125 |
+
options=['Lexical Sophistication', 'POS & Dependency Parser', 'Frequency Analysis', 'Corpus Data Visualizer'],
|
| 126 |
key='tool_choice'
|
| 127 |
)
|
| 128 |
|
|
|
|
| 195 |
col1, col2 = st.columns(2)
|
| 196 |
|
| 197 |
with col1:
|
| 198 |
+
token_analysis = st.checkbox("Token-based", value=True, key="token_analysis_enabled")
|
| 199 |
with col2:
|
| 200 |
+
lemma_analysis = st.checkbox("Lemma-based", value=True, key="lemma_analysis_enabled")
|
| 201 |
|
| 202 |
# Global Options
|
| 203 |
st.write("### βοΈ Global Options")
|
|
|
|
| 319 |
# Group-level enable/disable checkbox
|
| 320 |
group_key = f"group_enabled_{base_name}"
|
| 321 |
group_enabled = st.checkbox(
|
| 322 |
+
f"**{base_name}**",
|
| 323 |
value=True, # Default enabled
|
| 324 |
key=group_key,
|
| 325 |
help=f"Enable/disable all {base_name} analyses"
|
|
|
|
| 424 |
# Measure checkbox (pre-selected based on defaults)
|
| 425 |
measure_key = f"measure_{entry_name}_{measure}"
|
| 426 |
selected = st.checkbox(
|
| 427 |
+
f"{measure.replace('_', ' ').title()}",
|
| 428 |
value=measure in st.session_state[f'custom_measures_{entry_name}'],
|
| 429 |
key=measure_key,
|
| 430 |
help=f"Include {measure} in analysis"
|
web_app/handlers/pos_handlers.py
CHANGED
|
@@ -136,6 +136,32 @@ class POSHandlers:
|
|
| 136 |
mime="text/tab-separated-values"
|
| 137 |
)
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
# Dependency visualization
|
| 140 |
st.write("**Dependency Visualization**")
|
| 141 |
try:
|
|
|
|
| 136 |
mime="text/tab-separated-values"
|
| 137 |
)
|
| 138 |
|
| 139 |
+
with st.expander("## **See Tagset**"):
|
| 140 |
+
# col1, col2, col3 = st.columns(3)
|
| 141 |
+
|
| 142 |
+
st.markdown("""
|
| 143 |
+
|
| 144 |
+
The following table is based on [UD guideline](https://universaldependencies.org/u/dep/index.html).
|
| 145 |
+
|
| 146 |
+
Note that spaCy English model is trained on [ClearNLP tag set](https://github.com/clir/clearnlp-guidelines/blob/master/md/specifications/dependency_labels.md)
|
| 147 |
+
|
| 148 |
+
#### Dependency relations
|
| 149 |
+
|
| 150 |
+
| | **Nominals** | **Clauses** | **Modifier words** | **Function Words** |
|
| 151 |
+
|---------------------------|--------------------|--------------------|--------------------|--------------------|
|
| 152 |
+
| **Core arguments** | `nsubj`, `obj`, `iobj` | `csubj`, `ccomp`, `xcomp` | | |
|
| 153 |
+
| **Non-core dependents** | `obl`, `vocative`, `expl`, `dislocated` | `advcl` | `advmod`, `discourse` | `aux`, `cop`, `mark` |
|
| 154 |
+
| **Nominal dependents** | `nmod`, `appos`, `nummod` | `acl` | `amod` | `det`, `clf`, `case` |
|
| 155 |
+
|
| 156 |
+
#### Additional Relations
|
| 157 |
+
|
| 158 |
+
| **Coordination** | **Headless** | **Loose** | **Special** | **Other** |
|
| 159 |
+
|------------------|-------------|----------------|--------------------|------------------|
|
| 160 |
+
| `conj`, `cc` | `fixed`, `flat` | `list`, `parataxis` | `compound`, `orphan`, `goeswith`, `reparandum` | `punct`, `root`, `dep` |
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
""")
|
| 164 |
+
|
| 165 |
# Dependency visualization
|
| 166 |
st.write("**Dependency Visualization**")
|
| 167 |
try:
|