sbompolas commited on
Commit
8c61d1b
·
verified ·
1 Parent(s): be6e194

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -177
app.py CHANGED
@@ -3,92 +3,122 @@ import stanza
3
  import pandas as pd
4
  import sys
5
  import traceback
6
- from huggingface_hub import hf_hub_download
7
  import os
 
 
 
8
 
9
- # Global variable to store the model
10
  LESBIAN_GREEK_MODEL = None
11
 
12
- def download_and_load_model():
13
- """Download and load the Lesbian Greek model from Hugging Face"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  global LESBIAN_GREEK_MODEL
15
  try:
16
- print("Downloading Lesbian Greek model components from Hugging Face...")
17
-
18
- # Download model files
19
- model_files = {
20
- 'tokenizer': 'tokenizer.pt',
21
- 'lemmatizer': 'lemmatizer.pt',
22
- 'pos': 'pos.pt',
23
- 'depparse': 'depparse.pt'
24
- }
25
 
26
- model_paths = {}
 
 
 
27
 
28
- for component, filename in model_files.items():
29
- try:
30
- print(f"Downloading {filename}...")
31
- model_path = hf_hub_download(
32
- repo_id="sbompolas/Lesbian-Greek-Morphosyntactic-Model",
33
- filename=filename,
34
- cache_dir="./model_cache"
35
- )
36
- model_paths[component] = model_path
37
- print(f"Successfully downloaded {filename}")
38
- except Exception as e:
39
- print(f"Failed to download {filename}: {e}")
40
- return False
41
-
42
- # Initialize the Stanza pipeline with downloaded models
43
- print("Initializing Stanza pipeline with custom models...")
44
- LESBIAN_GREEK_MODEL = stanza.Pipeline(
45
- lang='grc', # Ancient Greek language code
46
- use_gpu=False,
47
- processors='tokenize,lemma,pos,depparse',
48
- tokenize_model_path=model_paths['tokenizer'],
49
- lemma_model_path=model_paths['lemmatizer'],
50
- pos_model_path=model_paths['pos'],
51
- depparse_model_path=model_paths['depparse'],
52
- tokenize_pretokenized=False,
53
- verbose=True
54
- )
55
-
56
- print("Successfully loaded Lesbian Greek Stanza pipeline!")
57
- return True
58
 
59
- except Exception as e:
60
- print(f"Failed to download/load model: {e}")
61
- traceback.print_exc()
62
 
63
- # Fallback: try to initialize with direct URLs
64
- try:
65
- print("Trying direct URL approach...")
66
- tokenize_model_path = 'https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/tokenizer.pt'
67
- lemma_model_path = 'https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/lemmatizer.pt'
68
- pos_model_path = 'https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/pos.pt'
69
- depparse_model_path = 'https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/depparse.pt'
70
-
71
- LESBIAN_GREEK_MODEL = stanza.Pipeline(
72
- lang='grc',
73
- use_gpu=False,
74
- processors='tokenize,lemma,pos,depparse',
75
- tokenize_model_path=tokenize_model_path,
76
- lemma_model_path=lemma_model_path,
77
- pos_model_path=pos_model_path,
78
- depparse_model_path=depparse_model_path,
79
- tokenize_pretokenized=False,
80
- verbose=True
81
- )
82
 
83
- print("Successfully loaded with direct URLs!")
84
- return True
 
85
 
86
- except Exception as e2:
87
- print(f"Direct URL approach also failed: {e2}")
88
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  def stanza_doc_to_conllu(doc) -> str:
91
- """Convert Stanza Document to CoNLL-U format"""
92
  conllu_lines = []
93
 
94
  for sent_idx, sentence in enumerate(doc.sentences):
@@ -117,8 +147,8 @@ def stanza_doc_to_conllu(doc) -> str:
117
 
118
  return "\n".join(conllu_lines)
119
 
120
- def parse_text_with_model(text: str) -> str:
121
- """Parse Lesbian Greek text using Stanza and return CoNLL-U format"""
122
  global LESBIAN_GREEK_MODEL
123
 
124
  if LESBIAN_GREEK_MODEL is None:
@@ -128,11 +158,11 @@ def parse_text_with_model(text: str) -> str:
128
  return "Error: Please enter some text to parse."
129
 
130
  try:
131
- print(f"Processing text: {text[:50]}...")
132
  # Process the text
133
  doc = LESBIAN_GREEK_MODEL(text)
134
 
135
- # Convert to CoNLL-U format
136
  conllu_output = stanza_doc_to_conllu(doc)
137
  print("CoNLL-U conversion successful!")
138
  return conllu_output
@@ -187,15 +217,13 @@ def create_dependency_visualization(df: pd.DataFrame) -> str:
187
  try:
188
  viz_lines = []
189
  viz_lines.append("Dependency Parse Visualization:")
190
- viz_lines.append("=" * 60)
191
 
192
  for _, row in df.iterrows():
193
  word = row['FORM']
194
  pos = row['UPOS']
195
- lemma = row['LEMMA']
196
  deprel = row['DEPREL']
197
  head_id = row['HEAD']
198
- feats = row['FEATS']
199
 
200
  # Find the head word
201
  if head_id != '0': # Not root
@@ -203,47 +231,28 @@ def create_dependency_visualization(df: pd.DataFrame) -> str:
203
  head_idx = int(head_id) - 1
204
  if 0 <= head_idx < len(df):
205
  head_word = df.iloc[head_idx]['FORM']
206
- viz_lines.append(f"{word} [{lemma}] ({pos}) --{deprel}--> {head_word}")
207
  else:
208
- viz_lines.append(f"{word} [{lemma}] ({pos}) --{deprel}--> [OUT_OF_RANGE]")
209
  except (ValueError, IndexError):
210
- viz_lines.append(f"{word} [{lemma}] ({pos}) --{deprel}--> [ERROR]")
211
  else:
212
- viz_lines.append(f"{word} [{lemma}] ({pos}) --{deprel}--> ROOT")
213
-
214
- # Add morphological features if available
215
- if feats != "_" and feats.strip():
216
- viz_lines.append(f" Morphology: {feats}")
217
- viz_lines.append("") # Add blank line for readability
218
 
219
  return "\n".join(viz_lines)
220
 
221
  except Exception as e:
222
  return f"Error creating visualization: {str(e)}"
223
 
224
- def get_model_info():
225
- """Get information about the loaded model"""
226
- global LESBIAN_GREEK_MODEL
227
- if LESBIAN_GREEK_MODEL is None:
228
- return "Model not loaded"
229
-
230
- try:
231
- info = []
232
- info.append(f"Language: {LESBIAN_GREEK_MODEL.lang}")
233
- info.append(f"Processors: {', '.join(LESBIAN_GREEK_MODEL.processors.keys())}")
234
- return " | ".join(info)
235
- except:
236
- return "Model loaded successfully"
237
-
238
  def process_text(text: str):
239
  """Main processing function that returns all outputs"""
240
  if not text.strip():
241
  empty_df = pd.DataFrame()
242
  return "Please enter some Lesbian Greek text to parse.", empty_df, "No data to display"
243
 
244
- # Parse with Stanza
245
  print(f"Starting to process: {text[:30]}...")
246
- conllu_output = parse_text_with_model(text)
247
 
248
  if conllu_output.startswith("Error"):
249
  empty_df = pd.DataFrame()
@@ -267,12 +276,13 @@ def process_text(text: str):
267
  empty_df = pd.DataFrame()
268
  return conllu_output, empty_df, error_msg
269
 
270
- # Initialize model
271
  print("Initializing Lesbian Greek Stanza model...")
272
- model_loaded = download_and_load_model()
273
 
274
  if not model_loaded:
275
- print("WARNING: Could not load Lesbian Greek model! The app will still run but parsing may not work.")
 
276
 
277
  # Create Gradio interface
278
  def create_gradio_app():
@@ -280,36 +290,55 @@ def create_gradio_app():
280
  gr.Markdown("""
281
  # Lesbian Greek Morphosyntactic Parser
282
 
283
- This tool uses the Lesbian Greek Morphosyntactic Model from Hugging Face to parse Ancient Greek text (specifically Lesbian dialect) and provides:
284
- - **CoNLL-U Format Output**: Standard linguistic annotation format
285
- - **Interactive Table**: Browse parsed tokens with linguistic features
286
- - **Dependency Visualization**: Text-based dependency structure display
287
- - **Morphological Analysis**: Detailed morphological features for each token
 
 
 
 
 
288
 
289
- **Model**: [sbompolas/Lesbian-Greek-Morphosyntactic-Model](https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model)
 
 
 
 
 
 
290
 
291
  Enter your Lesbian Greek text below to get started!
292
  """)
293
 
294
- # Model status
295
- with gr.Row():
296
- model_status = gr.Textbox(
297
- label="Model Status",
298
- value=get_model_info(),
299
- interactive=False,
300
- max_lines=1
301
- )
 
 
 
302
 
303
  with gr.Row():
304
  with gr.Column():
305
  text_input = gr.Textbox(
306
  label="Lesbian Greek Text Input",
307
- placeholder="Enter the Lesbian Greek text you want to analyze...",
308
  lines=4,
309
- value="τὰν δ πάντα ταῦτα"
310
  )
311
 
312
- parse_button = gr.Button("Parse Text", variant="primary", size="lg")
 
 
 
 
 
313
 
314
  with gr.Row():
315
  with gr.Column():
@@ -319,7 +348,7 @@ def create_gradio_app():
319
  lines=10,
320
  max_lines=20,
321
  show_copy_button=True,
322
- info="Raw CoNLL-U format output - you can copy this for use in other tools"
323
  )
324
 
325
  with gr.Row():
@@ -333,77 +362,67 @@ def create_gradio_app():
333
 
334
  with gr.Row():
335
  with gr.Column():
336
- gr.Markdown("### Dependency Structure & Morphology")
337
  dependency_viz = gr.Textbox(
338
  label="Dependency Relationships",
339
- lines=12,
340
- max_lines=25,
341
  show_copy_button=True,
342
- info="Text-based visualization of dependency relationships with morphological features"
343
  )
344
 
345
  # Event handling
346
- parse_button.click(
347
- fn=process_text,
348
- inputs=[text_input],
349
- outputs=[conllu_output, data_table, dependency_viz]
350
- )
351
-
352
- # Also trigger on Enter in text input
353
- text_input.submit(
354
- fn=process_text,
355
- inputs=[text_input],
356
- outputs=[conllu_output, data_table, dependency_viz]
357
- )
358
-
359
- # Add example texts
360
- gr.Markdown("### Example Lesbian Greek Texts")
361
- examples = [
362
- ["τὰν δὲ πάντα ταῦτα"],
363
- ["ἔρος δηὖτέ μ᾽ ὀ λυσιμέλης δόνει"],
364
- ["παῖς τάδ᾽ ἀμφὶ πάντα"],
365
- ["κάλλιστον μὲν ἐγὼ φαίμι"],
366
- ["οὐδ᾽ ἴα μάλα λάμπρα"],
367
- ["ἀλλὰ τίς σ᾽, ὦ Ψάπφ᾽, ἀδικήει;"],
368
- ]
369
-
370
- gr.Examples(
371
- examples=examples,
372
- inputs=[text_input],
373
- outputs=[conllu_output, data_table, dependency_viz],
374
- fn=process_text,
375
- cache_examples=False
376
- )
377
 
378
- gr.Markdown("""
379
- ### About This Tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
- This parser is specifically designed for Ancient Greek texts in the Lesbian dialect (associated with Sappho and other poets from Lesbos).
382
- The model provides detailed morphosyntactic analysis including:
383
 
384
- **CoNLL-U Format Fields:**
385
  - **ID**: Token index
386
  - **FORM**: Word form or punctuation symbol
387
  - **LEMMA**: Lemma or stem of word form
388
  - **UPOS**: Universal part-of-speech tag
389
  - **XPOS**: Language-specific part-of-speech tag
390
- - **FEATS**: Morphological features (case, number, gender, tense, etc.)
391
  - **HEAD**: Head of the current word
392
  - **DEPREL**: Dependency relation to the head
393
  - **DEPS**: Enhanced dependency graph
394
  - **MISC**: Miscellaneous annotations
395
 
396
- **About the Model:**
397
- - Built with Stanford Stanza framework
398
- - Trained specifically on Lesbian Greek texts
399
- - Provides accurate morphological analysis for this dialect
400
- - Includes syntactic parsing with dependency relations
401
- - Components: tokenizer, lemmatizer, POS tagger, dependency parser
402
-
403
- **Credits:**
404
- - Model: [sbompolas/Lesbian-Greek-Morphosyntactic-Model](https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model)
405
- - Built with Stanford Stanza framework
406
- - Interface created with Gradio
407
  """)
408
 
409
  return app
 
3
  import pandas as pd
4
  import sys
5
  import traceback
 
6
  import os
7
+ import tempfile
8
+ import requests
9
+ from pathlib import Path
10
 
11
+ # Global variable to store the Lesbian Greek model
12
  LESBIAN_GREEK_MODEL = None
13
 
14
+ def download_model_file(url, filename):
15
+ """Download a model file from Hugging Face"""
16
+ try:
17
+ print(f"Downloading {filename}...")
18
+ response = requests.get(url, stream=True)
19
+ response.raise_for_status()
20
+
21
+ with open(filename, 'wb') as f:
22
+ for chunk in response.iter_content(chunk_size=8192):
23
+ f.write(chunk)
24
+ print(f"Successfully downloaded {filename}")
25
+ return True
26
+ except Exception as e:
27
+ print(f"Failed to download {filename}: {e}")
28
+ return False
29
+
30
+ def setup_lesbian_greek_models():
31
+ """Download and setup the Lesbian Greek models"""
32
+ models_dir = Path("./lesbian_greek_models")
33
+ models_dir.mkdir(exist_ok=True)
34
+
35
+ # Model URLs from the Hugging Face repository
36
+ model_files = {
37
+ "tokenizer.pt": "https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/el_test_tokenizer.pt",
38
+ "lemmatizer.pt": "https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/el_test_nocharlm_lemmatizer.pt",
39
+ "pos.pt": "https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/el_test_transformer_tagger.pt",
40
+ "depparse.pt": "https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/el_test_transformer_parser.pt"
41
+ }
42
+
43
+ # Download all model files
44
+ for local_name, url in model_files.items():
45
+ local_path = models_dir / local_name
46
+ if not local_path.exists():
47
+ if not download_model_file(url, str(local_path)):
48
+ return False, f"Failed to download {local_name}"
49
+
50
+ return True, models_dir
51
+
52
+ def initialize_lesbian_greek_model():
53
+ """Initialize Stanza model for Lesbian Greek using custom models"""
54
  global LESBIAN_GREEK_MODEL
55
  try:
56
+ print("Setting up Lesbian Greek models...")
 
 
 
 
 
 
 
 
57
 
58
+ # Setup custom models
59
+ success, models_dir = setup_lesbian_greek_models()
60
+ if not success:
61
+ return False, models_dir # models_dir contains error message in this case
62
 
63
+ print("Creating Stanza pipeline with custom Lesbian Greek models...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # Create custom model directory structure that Stanza expects
66
+ stanza_dir = Path("./stanza_resources")
67
+ stanza_dir.mkdir(exist_ok=True)
68
 
69
+ # Set STANZA_RESOURCES_DIR environment variable
70
+ os.environ['STANZA_RESOURCES_DIR'] = str(stanza_dir)
71
+
72
+ # Create language-specific directory
73
+ lang_dir = stanza_dir / "el"
74
+ lang_dir.mkdir(exist_ok=True)
75
+
76
+ # Create processor-specific directories and copy models
77
+ processors = {
78
+ "tokenize": "tokenizer.pt",
79
+ "lemma": "lemmatizer.pt",
80
+ "pos": "pos.pt",
81
+ "depparse": "depparse.pt"
82
+ }
83
+
84
+ for processor, model_file in processors.items():
85
+ proc_dir = lang_dir / processor
86
+ proc_dir.mkdir(exist_ok=True)
 
87
 
88
+ # Copy model file to expected location
89
+ src_path = models_dir / model_file
90
+ dst_path = proc_dir / "lesbian_greek.pt"
91
 
92
+ if src_path.exists():
93
+ import shutil
94
+ shutil.copy2(str(src_path), str(dst_path))
95
+ print(f"Copied {model_file} to {dst_path}")
96
+
97
+ # Create the pipeline configuration
98
+ config = {
99
+ 'processors': 'tokenize,pos,lemma,depparse',
100
+ 'lang': 'el',
101
+ 'tokenize_model_path': str(lang_dir / "tokenize" / "lesbian_greek.pt"),
102
+ 'pos_model_path': str(lang_dir / "pos" / "lesbian_greek.pt"),
103
+ 'lemma_model_path': str(lang_dir / "lemma" / "lesbian_greek.pt"),
104
+ 'depparse_model_path': str(lang_dir / "depparse" / "lesbian_greek.pt"),
105
+ 'use_gpu': False,
106
+ 'verbose': True
107
+ }
108
+
109
+ print("Initializing Lesbian Greek pipeline...")
110
+ LESBIAN_GREEK_MODEL = stanza.Pipeline(**config)
111
+ print("Lesbian Greek model loaded successfully!")
112
+ return True, "Model loaded successfully"
113
+
114
+ except Exception as e:
115
+ error_msg = f"Failed to load Lesbian Greek model: {e}"
116
+ print(error_msg)
117
+ traceback.print_exc()
118
+ return False, error_msg
119
 
120
  def stanza_doc_to_conllu(doc) -> str:
121
+ """Convert Stanza Document to CoNLL-U format manually"""
122
  conllu_lines = []
123
 
124
  for sent_idx, sentence in enumerate(doc.sentences):
 
147
 
148
  return "\n".join(conllu_lines)
149
 
150
+ def parse_text_with_lesbian_greek(text: str) -> str:
151
+ """Parse Lesbian Greek text using custom Stanza models and return CoNLL-U format"""
152
  global LESBIAN_GREEK_MODEL
153
 
154
  if LESBIAN_GREEK_MODEL is None:
 
158
  return "Error: Please enter some text to parse."
159
 
160
  try:
161
+ print(f"Processing Lesbian Greek text: {text[:50]}...")
162
  # Process the text
163
  doc = LESBIAN_GREEK_MODEL(text)
164
 
165
+ # Convert to CoNLL-U format manually
166
  conllu_output = stanza_doc_to_conllu(doc)
167
  print("CoNLL-U conversion successful!")
168
  return conllu_output
 
217
  try:
218
  viz_lines = []
219
  viz_lines.append("Dependency Parse Visualization:")
220
+ viz_lines.append("=" * 50)
221
 
222
  for _, row in df.iterrows():
223
  word = row['FORM']
224
  pos = row['UPOS']
 
225
  deprel = row['DEPREL']
226
  head_id = row['HEAD']
 
227
 
228
  # Find the head word
229
  if head_id != '0': # Not root
 
231
  head_idx = int(head_id) - 1
232
  if 0 <= head_idx < len(df):
233
  head_word = df.iloc[head_idx]['FORM']
234
+ viz_lines.append(f"{word} ({pos}) --{deprel}--> {head_word}")
235
  else:
236
+ viz_lines.append(f"{word} ({pos}) --{deprel}--> [OUT_OF_RANGE]")
237
  except (ValueError, IndexError):
238
+ viz_lines.append(f"{word} ({pos}) --{deprel}--> [ERROR]")
239
  else:
240
+ viz_lines.append(f"{word} ({pos}) --{deprel}--> ROOT")
 
 
 
 
 
241
 
242
  return "\n".join(viz_lines)
243
 
244
  except Exception as e:
245
  return f"Error creating visualization: {str(e)}"
246
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  def process_text(text: str):
248
  """Main processing function that returns all outputs"""
249
  if not text.strip():
250
  empty_df = pd.DataFrame()
251
  return "Please enter some Lesbian Greek text to parse.", empty_df, "No data to display"
252
 
253
+ # Parse with custom Lesbian Greek model
254
  print(f"Starting to process: {text[:30]}...")
255
+ conllu_output = parse_text_with_lesbian_greek(text)
256
 
257
  if conllu_output.startswith("Error"):
258
  empty_df = pd.DataFrame()
 
276
  empty_df = pd.DataFrame()
277
  return conllu_output, empty_df, error_msg
278
 
279
+ # Initialize Lesbian Greek model
280
  print("Initializing Lesbian Greek Stanza model...")
281
+ model_loaded, status_message = initialize_lesbian_greek_model()
282
 
283
  if not model_loaded:
284
+ print(f"CRITICAL ERROR: {status_message}")
285
+ # Don't exit, allow app to start and show error message
286
 
287
  # Create Gradio interface
288
  def create_gradio_app():
 
290
  gr.Markdown("""
291
  # Lesbian Greek Morphosyntactic Parser
292
 
293
+ This tool uses custom Stanza models trained specifically for the **Lesbian dialect of Greek**
294
+ (spoken on the island of Lesbos). The models provide:
295
+
296
+ - **Tokenization**: Splits text into tokens
297
+ - **POS Tagging**: Part-of-speech classification
298
+ - **Lemmatization**: Base form identification
299
+ - **Dependency Parsing**: Syntactic relationship analysis
300
+ - **CoNLL-U Output**: Standard linguistic annotation format
301
+
302
+ ## About the Models
303
 
304
+ These models were trained on a curated treebank of 540 sentences from both oral and written
305
+ sources collected from various villages of Lesbos, including Agra, Chidira, Eressos,
306
+ Pterounta, Mesotopos, and Parakoila.
307
+
308
+ **Citation**: Bompolas, S., Markantonatou, S., Ralli, A., & Anastasopoulos, A. (2025).
309
+ Crossing Dialectal Boundaries: Building a Treebank for the Dialect of Lesbos through
310
+ Knowledge Transfer from Standard Modern Greek.
311
 
312
  Enter your Lesbian Greek text below to get started!
313
  """)
314
 
315
+ if not model_loaded:
316
+ gr.Markdown(f"""
317
+ ⚠️ **Model Loading Error**: {status_message}
318
+
319
+ The Lesbian Greek models could not be loaded. This may be due to:
320
+ - Network issues downloading the models
321
+ - Insufficient memory or storage
322
+ - Model compatibility issues
323
+
324
+ Please try refreshing the page or contact the developers.
325
+ """)
326
 
327
  with gr.Row():
328
  with gr.Column():
329
  text_input = gr.Textbox(
330
  label="Lesbian Greek Text Input",
331
+ placeholder="Εισάγετε το κείμενο στη Λεσβιακή διάλεκτο...",
332
  lines=4,
333
+ value="Τα παιδιά πάντ στο κήπ." if model_loaded else ""
334
  )
335
 
336
+ parse_button = gr.Button(
337
+ "Parse Lesbian Greek Text",
338
+ variant="primary",
339
+ size="lg",
340
+ interactive=model_loaded
341
+ )
342
 
343
  with gr.Row():
344
  with gr.Column():
 
348
  lines=10,
349
  max_lines=20,
350
  show_copy_button=True,
351
+ info="Raw CoNLL-U format output optimized for Lesbian Greek dialect"
352
  )
353
 
354
  with gr.Row():
 
362
 
363
  with gr.Row():
364
  with gr.Column():
365
+ gr.Markdown("### Dependency Structure")
366
  dependency_viz = gr.Textbox(
367
  label="Dependency Relationships",
368
+ lines=8,
369
+ max_lines=15,
370
  show_copy_button=True,
371
+ info="Text-based visualization of syntactic dependencies"
372
  )
373
 
374
  # Event handling
375
+ if model_loaded:
376
+ parse_button.click(
377
+ fn=process_text,
378
+ inputs=[text_input],
379
+ outputs=[conllu_output, data_table, dependency_viz]
380
+ )
381
+
382
+ # Also trigger on Enter in text input
383
+ text_input.submit(
384
+ fn=process_text,
385
+ inputs=[text_input],
386
+ outputs=[conllu_output, data_table, dependency_viz]
387
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
+ # Add Lesbian Greek examples (if available)
390
+ if model_loaded:
391
+ gr.Markdown("### Example Lesbian Greek Texts")
392
+ examples = [
393
+ ["Τα παιδιά πάντ στο κήπ."],
394
+ ["Η γάτα κάθεται στο τραπέζ."],
395
+ ["Ο ήλιος λάμπει στον ουρανό."],
396
+ ["Η θάλασσα είναι γαλάζια και όμορφη."],
397
+ ]
398
+
399
+ gr.Examples(
400
+ examples=examples,
401
+ inputs=[text_input],
402
+ outputs=[conllu_output, data_table, dependency_viz],
403
+ fn=process_text,
404
+ cache_examples=False
405
+ )
406
 
407
+ gr.Markdown("""
408
+ ### About CoNLL-U Format
409
 
410
+ The CoNLL-U format includes these fields for each token:
411
  - **ID**: Token index
412
  - **FORM**: Word form or punctuation symbol
413
  - **LEMMA**: Lemma or stem of word form
414
  - **UPOS**: Universal part-of-speech tag
415
  - **XPOS**: Language-specific part-of-speech tag
416
+ - **FEATS**: Morphological features
417
  - **HEAD**: Head of the current word
418
  - **DEPREL**: Dependency relation to the head
419
  - **DEPS**: Enhanced dependency graph
420
  - **MISC**: Miscellaneous annotations
421
 
422
+ ### Resources
423
+ - [Lesbian Greek Models on Hugging Face](https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model)
424
+ - [UD_Greek-Lesbian Treebank](https://github.com/UniversalDependencies/UD_Greek-Lesbian)
425
+ - [Stanza Documentation](https://stanfordnlp.github.io/stanza/)
 
 
 
 
 
 
 
426
  """)
427
 
428
  return app