sbompolas commited on
Commit
9391975
·
verified ·
1 Parent(s): 45f8fa5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -209
app.py CHANGED
@@ -1,210 +1,4 @@
1
- # Create Gradio interface
2
- def create_gradio_app():
3
- with gr.Blocks(title="Lesbian Greek Morphosyntactic Parser", theme=gr.themes.Soft()) as app:
4
- gr.Markdown("""
5
- # Lesbian Greek Morphosyntactic Parser
6
-
7
- This tool uses custom Stanza models trained specifically for the **Lesbian dialect of Greek**
8
- (spoken on the island of Lesbos). Choose between two different model variants:
9
-
10
- - **Lesbian-only**: Trained exclusively on Lesbian dialect data
11
- - **Lesbian-synthetic-data**: Enhanced with synthetic data from NGUD for improved coverage
12
-
13
- ## Model Features
14
-
15
- Both models provide:
16
- - **Tokenization**: Splits text into tokens
17
- - **POS Tagging**: Part-of-speech classification
18
- - **Lemmatization**: Base form identification
19
- - **Dependency Parsing**: Syntactic relationship analysis
20
- - **CoNLL-U Output**: Standard linguistic annotation format
21
-
22
- ## About the Models
23
-
24
- The **Lesbian-only** model was trained on a curated treebank of 540 sentences from both oral and written
25
- sources collected from various villages of Lesbos, including Agra, Chidira, Eressos,
26
- Pterounta, Mesotopos, and Parakoila.
27
-
28
- The **Lesbian-synthetic-data** model enhances the original with additional synthetic data
29
- generated using NGUD (Neural Grammatical Universal Dependencies) techniques.
30
-
31
- **Citation**: Bompolas, S., Markantonatou, S., Ralli, A., & Anastasopoulos, A. (2025).
32
- Crossing Dialectal Boundaries: Building a Treebank for the Dialect of Lesbos through
33
- Knowledge Transfer from Standard Modern Greek.
34
-
35
- Enter your Lesbian Greek text below to get started!
36
- """)
37
-
38
- # Show model status for both models
39
- any_model_loaded = LESBIAN_ONLY_MODEL is not None or LESBIAN_SYNTHETIC_MODEL is not None
40
-
41
- if any_model_loaded:
42
- status_html = "### Model Status\n\n"
43
- for model_name, result in model_results.items():
44
- if "✅" in result:
45
- status_html += f"- **{model_name.replace('-', ' ').title()}**: {result}\n"
46
- elif "⚠️" in result:
47
- status_html += f"- **{model_name.replace('-', ' ').title()}**: {result}\n"
48
- else:
49
- status_html += f"- **{model_name.replace('-', ' ').title()}**: ❌ {result}\n"
50
- gr.Markdown(status_html)
51
- else:
52
- gr.Markdown("""
53
- ❌ **Model Loading Error**: No models could be loaded.
54
-
55
- This may be due to:
56
- - Network issues downloading the models
57
- - Missing dependencies (transformers library)
58
- - Insufficient memory or storage
59
- - Model compatibility issues
60
-
61
- Please try refreshing the page or contact the developers.
62
- """)
63
-
64
- with gr.Row():
65
- with gr.Column():
66
- # Model selection
67
- model_selector = gr.Dropdown(
68
- choices=[
69
- ("Lesbian-only (Original)", "lesbian-only"),
70
- ("Lesbian-synthetic-data (Enhanced)", "lesbian-synthetic-data")
71
- ],
72
- value="lesbian-only",
73
- label="Select Model",
74
- info="Choose which Lesbian Greek model to use for parsing",
75
- interactive=any_model_loaded
76
- )
77
-
78
- text_input = gr.Textbox(
79
- label="Lesbian Greek Text Input",
80
- placeholder="Εισάγετε το κείμενο στη Λεσβιακή διάλεκτο..." if any_model_loaded else "Models not loaded - please refresh page",
81
- lines=4,
82
- value="Τα παιδιά πάντ στο κήπ." if any_model_loaded else "",
83
- interactive=any_model_loaded
84
- )
85
-
86
- parse_button = gr.Button(
87
- "Parse Lesbian Greek Text",
88
- variant="primary",
89
- size="lg",
90
- interactive=any_model_loaded
91
- )
92
-
93
- with gr.Row():
94
- with gr.Column():
95
- gr.Markdown("### Interactive Dependency Tree")
96
- dependency_tree_viz = gr.HTML(
97
- label="Visual Dependency Tree",
98
- value="<p>Enter text and click parse to see the dependency tree visualization</p>"
99
- )
100
-
101
- with gr.Row():
102
- with gr.Column():
103
- gr.Markdown("### CoNLL-U Output")
104
- conllu_output = gr.Textbox(
105
- label="CoNLL-U Format",
106
- lines=10,
107
- max_lines=20,
108
- show_copy_button=True,
109
- info="Raw CoNLL-U format output optimized for Lesbian Greek dialect"
110
- )
111
-
112
- with gr.Row():
113
- with gr.Column():
114
- gr.Markdown("### Parsed Data Table")
115
- data_table = gr.Dataframe(
116
- label="Token Analysis",
117
- interactive=False,
118
- wrap=True
119
- )
120
-
121
- with gr.Row():
122
- with gr.Column():
123
- gr.Markdown("### Text-based Dependency Structure")
124
- dependency_viz = gr.Textbox(
125
- label="Dependency Relationships",
126
- lines=8,
127
- max_lines=15,
128
- show_copy_button=True,
129
- info="Text-based visualization of syntactic dependencies"
130
- )
131
-
132
- # Event handling
133
- if any_model_loaded:
134
- parse_button.click(
135
- fn=process_text,
136
- inputs=[text_input, model_selector],
137
- outputs=[conllu_output, data_table, dependency_viz, dependency_tree_viz]
138
- )
139
-
140
- # Also trigger on Enter in text input
141
- text_input.submit(
142
- fn=process_text,
143
- inputs=[text_input, model_selector],
144
- outputs=[conllu_output, data_table, dependency_viz, dependency_tree_viz]
145
- )
146
-
147
- # Add Lesbian Greek examples (if available)
148
- if any_model_loaded:
149
- gr.Markdown("### Example Lesbian Greek Texts")
150
- examples = [
151
- ["Τα παιδιά πάντ στο κήπ.", "lesbian-only"],
152
- ["Η γάτα κάθεται στο τραπέζ.", "lesbian-only"],
153
- ["Ο ήλιος λάμπει στον ουρανό.", "lesbian-synthetic-data"],
154
- ["Η θάλασσα είναι γαλάζια και όμορφη.", "lesbian-synthetic-data"],
155
- ]
156
-
157
- gr.Examples(
158
- examples=examples,
159
- inputs=[text_input, model_selector],
160
- outputs=[conllu_output, data_table, dependency_viz, dependency_tree_viz],
161
- fn=process_text,
162
- cache_examples=False
163
- )
164
-
165
- gr.Markdown("""
166
- ### Model Comparison
167
-
168
- - **Lesbian-only**: Pure dialect model trained exclusively on authentic Lesbian Greek data
169
- - **Lesbian-synthetic-data**: Enhanced model with synthetic data augmentation for better coverage
170
-
171
- ### Visualization Legend
172
-
173
- The **Interactive Dependency Tree** shows:
174
- - **Words** in bold at the bottom with their position numbers
175
- - **POS tags** in gray below each word
176
- - **Dependency arcs** as curved lines with arrows pointing to heads
177
- - **Dependency relations** labeled on the arcs with white backgrounds
178
- - **Color coding** for different dependency types:
179
- - Black: ROOT relations
180
- - Blue: Subject relations (nsubj)
181
- - Green: Object relations (obj)
182
- - Orange: Determiners (det)
183
- - Purple: Adjective modifiers (amod)
184
- - And more...
185
-
186
- ### About CoNLL-U Format
187
-
188
- The CoNLL-U format includes these fields for each token:
189
- - **ID**: Token index
190
- - **FORM**: Word form or punctuation symbol
191
- - **LEMMA**: Lemma or stem of word form
192
- - **UPOS**: Universal part-of-speech tag
193
- - **XPOS**: Language-specific part-of-speech tag
194
- - **FEATS**: Morphological features
195
- - **HEAD**: Head of the current word
196
- - **DEPREL**: Dependency relation to the head
197
- - **DEPS**: Enhanced dependency graph
198
- - **MISC**: Miscellaneous annotations
199
-
200
- ### Resources
201
- - [Original Lesbian Greek Models](https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model)
202
- - [Enhanced NGUD-Lesbian Models](https://huggingface.co/sbompolas/NGUD-Lesbian-Morphosyntactic-Model)
203
- - [UD_Greek-Lesbian Treebank](https://github.com/UniversalDependencies/UD_Greek-Lesbian)
204
- - [Stanza Documentation](https://stanfordnlp.github.io/stanza/)
205
- """)
206
-
207
- return appimport gradio as gr
208
  import stanza
209
  import pandas as pd
210
  import sys
@@ -1090,5 +884,4 @@ if __name__ == "__main__":
1090
  print("Creating Gradio app...")
1091
  app = create_gradio_app()
1092
  print("Launching app...")
1093
- app.launch()
1094
-
 
1
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import stanza
3
  import pandas as pd
4
  import sys
 
884
  print("Creating Gradio app...")
885
  app = create_gradio_app()
886
  print("Launching app...")
887
+ app.launch()