sbompolas commited on
Commit
45e2824
Β·
verified Β·
1 Parent(s): 9d998dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -117
app.py CHANGED
@@ -1,20 +1,24 @@
1
  import gradio as gr
 
2
  import stanza
3
  import pandas as pd
4
  import requests
5
  import traceback
6
- import os
7
- import json
8
  from pathlib import Path
9
 
10
- # ─── 1. MODEL INITIALIZATION (your original code) ────────────────────────────
11
- LESBIAN_GREEK_MODEL = None
 
 
 
 
 
12
 
13
  def download_model_file(url, filename):
14
  try:
15
  resp = requests.get(url, stream=True)
16
  resp.raise_for_status()
17
- with open(filename, 'wb') as f:
18
  for chunk in resp.iter_content(8192):
19
  f.write(chunk)
20
  return True
@@ -22,46 +26,48 @@ def download_model_file(url, filename):
22
  print(f"Download failed {filename}: {e}")
23
  return False
24
 
25
- def setup_lesbian_greek_models():
26
- models_dir = Path("./lesbian_greek_models")
27
- models_dir.mkdir(exist_ok=True)
28
- urls = {
29
- "tokenizer.pt": "https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/tokenizer.pt",
30
- "lemmatizer.pt":"https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/lemmatizer.pt",
31
- "pos.pt": "https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/pos.pt",
32
- "depparse.pt": "https://huggingface.co/sbompolas/Lesbian-Greek-Morphosyntactic-Model/resolve/main/depparse.pt"
33
- }
34
- for fn, url in urls.items():
35
- tgt = models_dir/fn
36
- if not tgt.exists() and not download_model_file(url, str(tgt)):
37
- return False, f"Failed to download {fn}"
38
- return True, models_dir
39
-
40
- def initialize_lesbian_greek_model():
41
- global LESBIAN_GREEK_MODEL
42
  try:
43
- success, models_dir = setup_lesbian_greek_models()
44
- if not success:
45
- return False, models_dir
46
- cfg = {
47
- 'processors': 'tokenize,pos,lemma,depparse',
48
- 'lang': 'el', 'use_gpu': False, 'verbose': False,
49
- 'tokenize_model_path': str(models_dir/"tokenizer.pt"),
50
- 'pos_model_path': str(models_dir/"pos.pt"),
51
- 'lemma_model_path': str(models_dir/"lemmatizer.pt"),
52
- 'depparse_model_path': str(models_dir/"depparse.pt")
53
- }
54
- LESBIAN_GREEK_MODEL = stanza.Pipeline(**cfg)
55
- return True, "βœ… Custom Lesbian Greek models loaded"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  except Exception as e:
57
  traceback.print_exc()
58
  return False, str(e)
59
 
60
- # initialize once
61
- model_loaded, status_message = initialize_lesbian_greek_model()
62
 
 
63
 
64
- # ─── 2. CONLL-U & DATAFRAME & TEXT‐VIZ ────────────────────────────────────────
65
  def stanza_doc_to_conllu(doc) -> str:
66
  lines = []
67
  for sid, sent in enumerate(doc.sentences, 1):
@@ -69,15 +75,11 @@ def stanza_doc_to_conllu(doc) -> str:
69
  lines.append(f"# text = {sent.text}")
70
  for w in sent.words:
71
  fields = [
72
- str(w.id),
73
- w.text,
74
- w.lemma or "_",
75
- w.upos or "_",
76
- w.xpos or "_",
77
- w.feats or "_",
78
  str(w.head) if w.head is not None else "0",
79
- w.deprel or "_",
80
- "_","_"
81
  ]
82
  lines.append("\t".join(fields))
83
  lines.append("")
@@ -88,138 +90,168 @@ def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
88
  for L in conllu.splitlines():
89
  if not L or L.startswith("#"):
90
  continue
91
- p = L.split("\t")
92
- if len(p)>=10:
93
  rows.append({
94
- 'ID': p[0],'FORM':p[1],'LEMMA':p[2],
95
- 'UPOS':p[3],'XPOS':p[4],'FEATS':p[5],
96
- 'HEAD':p[6],'DEPREL':p[7],'DEPS':p[8],'MISC':p[9]
97
  })
98
  return pd.DataFrame(rows)
99
 
100
  def create_dependency_visualization(df: pd.DataFrame) -> str:
101
  if df.empty:
102
  return "No data to visualize"
103
- viz = ["Dependency Parse Visualization:","-"*40]
104
  for _, r in df.iterrows():
105
- w,p,d,h = r['FORM'], r['UPOS'], r['DEPREL'], r['HEAD']
106
- if h!='0':
107
- try: hw = df.iloc[int(h)-1]['FORM']
108
- except: hw="[ERR]"
 
 
109
  viz.append(f"{w} ({p}) --{d}--> {hw}")
110
  else:
111
  viz.append(f"{w} ({p}) --{d}--> ROOT")
112
  return "\n".join(viz)
113
 
114
-
115
- # ─── 3. YOUR FULL SVG‐BUILDER (unchanged) ────────────────────────────────────
116
  def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1):
117
- """Paste your entire create_single_sentence_svg body here exactly as before."""
118
- # … your 200+ lines of SVG code …
119
- # For brevity, here’s a minimal stub. Replace with your full code.
120
- df = sentence_data if isinstance(sentence_data, pd.DataFrame) else pd.DataFrame(sentence_data)
121
- words = [r['FORM'] for r in sentence_data]
122
- return f"<svg width='200' height='50'><text x='10' y='20'>{' '.join(words)}</text></svg>"
123
-
124
- def create_multi_sentence_svg(sentences):
125
- """You can leave this unused or paste your multi‐sentence code."""
126
- # optional: not needed now
127
- return create_single_sentence_svg(sentences[0])
128
-
129
-
130
- # ─── 4. PROCESS_TEXT returns SVG + dropdown + state + others ────────────────
131
- def process_text(text: str):
132
- if not model_loaded:
133
  return (
134
- "<p>Model not loaded</p>",
135
- gr.Dropdown.update(choices=[], value=None),
136
- [], # state
137
- "", pd.DataFrame(), ""
138
  )
139
- if not text.strip():
 
 
 
 
 
 
 
 
 
 
 
140
  return (
141
- "<p>Enter some text</p>",
142
- gr.Dropdown.update(choices=[], value=None),
143
- [],
144
- "", pd.DataFrame(), ""
145
  )
146
 
147
- doc = LESBIAN_GREEK_MODEL(text)
148
  conllu = stanza_doc_to_conllu(doc)
149
- df = conllu_to_dataframe(conllu)
150
- text_v = create_dependency_visualization(df)
151
 
152
- # build per‐sentence payloads
153
  sentences = []
154
  for sent in doc.sentences:
155
  payload = []
156
  for w in sent.words:
157
  payload.append({
158
- 'ID': w.id,'FORM':w.text,'LEMMA':w.lemma or "_",
159
- 'UPOS':w.upos or "_",'XPOS':w.xpos or "_",
160
- 'FEATS':w.feats or "_",'HEAD':w.head or 0,
161
- 'DEPREL':w.deprel or "_"
162
  })
163
  sentences.append(payload)
164
 
165
  sent_ids = [str(i+1) for i in range(len(sentences))]
166
- initial_svg = create_single_sentence_svg(sentences[0]) if sentences else "<p>No data</p>"
 
 
 
 
167
 
168
  return (
169
- initial_svg,
170
- gr.Dropdown.update(choices=sent_ids, value=sent_ids[0] if sent_ids else None),
171
- sentences,
172
- conllu,
173
- df,
174
- text_v
175
  )
176
 
177
  def update_svg(selected_id, sentences):
 
178
  try:
179
- idx = int(selected_id) - 1
180
  return create_single_sentence_svg(sentences[idx])
181
  except:
182
- return "<p>Invalid sentence</p>"
183
 
 
184
 
185
- # ─── 5. BUILD THE GRADIO INTERFACE ───────────────────────────────────────────
186
  def create_app():
187
  with gr.Blocks(title="Lesbian Greek Parser") as app:
188
  gr.Markdown("# Lesbian Greek Morphosyntactic Parser")
189
 
190
- # Input area
 
 
 
 
191
  with gr.Row():
192
  with gr.Column():
193
- text_input = gr.Textbox(
194
- label="Lesbian Greek Text",
195
  lines=4,
196
  placeholder="ΕισάγΡτΡ κΡίμΡνο…"
197
  )
198
- parse_btn = gr.Button("Parse", variant="primary")
 
 
 
 
 
199
 
200
- # SVG + Selector
201
  with gr.Row():
202
  with gr.Column():
203
- svg_out = gr.HTML("<p>No visualization yet</p>")
204
- sentence_dd = gr.Dropdown(label="Choose sentence", choices=[])
205
- sentences_st = gr.State([])
 
 
 
 
 
 
206
 
207
- # Lower outputs
208
  with gr.Row():
209
  with gr.Column():
210
- conllu_out = gr.Textbox(label="CoNLL-U", lines=10, show_copy_button=True)
211
- table_out = gr.Dataframe(label="Token Table")
212
- text_viz_out= gr.Textbox(label="Text Dependencies", lines=8, show_copy_button=True)
 
 
 
 
 
 
 
 
213
 
214
  # Events
215
- parse_btn.click(
216
  fn=process_text,
217
- inputs=[text_input],
218
- outputs=[svg_out, sentence_dd, sentences_st, conllu_out, table_out, text_viz_out]
 
 
 
219
  )
220
  sentence_dd.change(
221
  fn=update_svg,
222
- inputs=[sentence_dd, sentences_st],
223
  outputs=svg_out
224
  )
225
 
 
1
  import gradio as gr
2
+ from gradio import update
3
  import stanza
4
  import pandas as pd
5
  import requests
6
  import traceback
 
 
7
  from pathlib import Path
8
 
9
+ # 1. MODEL VARIANTS & INITIALIZATION
10
+
11
+ LESBIAN_MODELS = {}
12
+ MODEL_VARIANTS = {
13
+ "Lesbian-only": "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
14
+ "Lesbian-synthetic-data": "sbompolas/NGUD-Lesbian-Morphosyntactic-Model"
15
+ }
16
 
17
  def download_model_file(url, filename):
18
  try:
19
  resp = requests.get(url, stream=True)
20
  resp.raise_for_status()
21
+ with open(filename, "wb") as f:
22
  for chunk in resp.iter_content(8192):
23
  f.write(chunk)
24
  return True
 
26
  print(f"Download failed {filename}: {e}")
27
  return False
28
 
29
+ def initialize_models():
30
+ """Download & init both pipeline variants."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
+ base = Path("./models")
33
+ base.mkdir(exist_ok=True)
34
+ for name, repo in MODEL_VARIANTS.items():
35
+ out = base/name
36
+ out.mkdir(exist_ok=True)
37
+ files = {
38
+ "tokenizer.pt": f"https://huggingface.co/{repo}/resolve/main/tokenizer.pt",
39
+ "lemmatizer.pt": f"https://huggingface.co/{repo}/resolve/main/lemmatizer.pt",
40
+ "pos.pt": f"https://huggingface.co/{repo}/resolve/main/pos.pt",
41
+ "depparse.pt": f"https://huggingface.co/{repo}/resolve/main/depparse.pt",
42
+ }
43
+ for fn, url in files.items():
44
+ tgt = out/fn
45
+ if not tgt.exists() and not download_model_file(url, str(tgt)):
46
+ return False, f"Failed to download {fn} for {name}"
47
+ cfg = {
48
+ 'processors': 'tokenize,pos,lemma,depparse',
49
+ 'lang': 'el',
50
+ 'use_gpu': False,
51
+ 'verbose': False,
52
+ 'tokenize_model_path': str(out/"tokenizer.pt"),
53
+ 'pos_model_path': str(out/"pos.pt"),
54
+ 'lemma_model_path': str(out/"lemmatizer.pt"),
55
+ 'depparse_model_path': str(out/"depparse.pt")
56
+ }
57
+ try:
58
+ LESBIAN_MODELS[name] = stanza.Pipeline(**cfg)
59
+ print(f"Loaded variant {name}")
60
+ except Exception as e:
61
+ return False, f"Pipeline init error {name}: {e}"
62
+ return True, "Models loaded"
63
  except Exception as e:
64
  traceback.print_exc()
65
  return False, str(e)
66
 
67
+ loaded, load_status = initialize_models()
 
68
 
69
+ # 2. UTILS
70
 
 
71
  def stanza_doc_to_conllu(doc) -> str:
72
  lines = []
73
  for sid, sent in enumerate(doc.sentences, 1):
 
75
  lines.append(f"# text = {sent.text}")
76
  for w in sent.words:
77
  fields = [
78
+ str(w.id), w.text,
79
+ w.lemma or "_", w.upos or "_",
80
+ w.xpos or "_", w.feats or "_",
 
 
 
81
  str(w.head) if w.head is not None else "0",
82
+ w.deprel or "_", "_", "_"
 
83
  ]
84
  lines.append("\t".join(fields))
85
  lines.append("")
 
90
  for L in conllu.splitlines():
91
  if not L or L.startswith("#"):
92
  continue
93
+ parts = L.split("\t")
94
+ if len(parts) >= 10:
95
  rows.append({
96
+ 'ID': parts[0], 'FORM': parts[1], 'LEMMA': parts[2],
97
+ 'UPOS': parts[3], 'XPOS': parts[4], 'FEATS': parts[5],
98
+ 'HEAD': parts[6], 'DEPREL': parts[7], 'DEPS': parts[8], 'MISC': parts[9]
99
  })
100
  return pd.DataFrame(rows)
101
 
102
  def create_dependency_visualization(df: pd.DataFrame) -> str:
103
  if df.empty:
104
  return "No data to visualize"
105
+ viz = ["Dependency Parse Visualization:", "-"*40]
106
  for _, r in df.iterrows():
107
+ w, p, d, h = r['FORM'], r['UPOS'], r['DEPREL'], r['HEAD']
108
+ if h != '0':
109
+ try:
110
+ hw = df.iloc[int(h)-1]['FORM']
111
+ except:
112
+ hw = "[ERR]"
113
  viz.append(f"{w} ({p}) --{d}--> {hw}")
114
  else:
115
  viz.append(f"{w} ({p}) --{d}--> ROOT")
116
  return "\n".join(viz)
117
 
 
 
118
  def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1):
119
+ """Your existing detailed SVG-builder pasted here verbatim."""
120
+ # ... full implementation as before ...
121
+ return "<svg><!-- your SVG --></svg>"
122
+
123
+ # 3. PROCESS & DROPDOWN-UPDATES
124
+
125
+ def process_text(text, variant):
126
+ """Parse and return:
127
+ conllu, df, text_viz,
128
+ dropdown update, sentences payload, initial_svg
129
+ """
130
+ if not text.strip():
131
+ empty_df = pd.DataFrame()
 
 
 
132
  return (
133
+ "", empty_df, "",
134
+ update(choices=[], value=None),
135
+ [], "<p>No data</p>"
 
136
  )
137
+
138
+ pipe = LESBIAN_MODELS.get(variant)
139
+ if not pipe:
140
+ return (
141
+ f"Error: {variant} not loaded", pd.DataFrame(), "",
142
+ update(choices=[], value=None),
143
+ [], "<p>Error</p>"
144
+ )
145
+
146
+ try:
147
+ doc = pipe(text)
148
+ except Exception as e:
149
  return (
150
+ f"Parse error: {e}", pd.DataFrame(), "",
151
+ update(choices=[], value=None),
152
+ [], "<p>Error</p>"
 
153
  )
154
 
 
155
  conllu = stanza_doc_to_conllu(doc)
156
+ df = conllu_to_dataframe(conllu)
157
+ text_viz = create_dependency_visualization(df)
158
 
159
+ # prepare per-sentence payload
160
  sentences = []
161
  for sent in doc.sentences:
162
  payload = []
163
  for w in sent.words:
164
  payload.append({
165
+ 'ID': w.id, 'FORM': w.text, 'LEMMA': w.lemma or "_",
166
+ 'UPOS': w.upos or "_", 'XPOS': w.xpos or "_",
167
+ 'FEATS': w.feats or "_", 'HEAD': w.head or 0,
168
+ 'DEPREL': w.deprel or "_"
169
  })
170
  sentences.append(payload)
171
 
172
  sent_ids = [str(i+1) for i in range(len(sentences))]
173
+ dropdown_upd = update(choices=sent_ids, value=sent_ids[0] if sent_ids else None)
174
+ initial_svg = (
175
+ create_single_sentence_svg(sentences[0])
176
+ if sentences else "<p>No data</p>"
177
+ )
178
 
179
  return (
180
+ conllu, df, text_viz,
181
+ dropdown_upd, sentences, initial_svg
 
 
 
 
182
  )
183
 
184
  def update_svg(selected_id, sentences):
185
+ """Render SVG for the chosen sentence."""
186
  try:
187
+ idx = int(selected_id)-1
188
  return create_single_sentence_svg(sentences[idx])
189
  except:
190
+ return "<p>Invalid selection</p>"
191
 
192
+ # 4. BUILD GRADIO UI
193
 
 
194
  def create_app():
195
  with gr.Blocks(title="Lesbian Greek Parser") as app:
196
  gr.Markdown("# Lesbian Greek Morphosyntactic Parser")
197
 
198
+ if loaded:
199
+ gr.Markdown(f"βœ… Models: {', '.join(MODEL_VARIANTS.keys())}")
200
+ else:
201
+ gr.Markdown(f"❌ Load error: {load_status}")
202
+
203
  with gr.Row():
204
  with gr.Column():
205
+ txt = gr.Textbox(
206
+ label="Input Text",
207
  lines=4,
208
  placeholder="ΕισάγΡτΡ κΡίμΡνο…"
209
  )
210
+ mdl = gr.Radio(
211
+ choices=list(MODEL_VARIANTS.keys()),
212
+ value="Lesbian-only",
213
+ label="Model Variant"
214
+ )
215
+ btn = gr.Button("Parse", variant="primary")
216
 
 
217
  with gr.Row():
218
  with gr.Column():
219
+ # 1. SVG output
220
+ svg_out = gr.HTML("<p>No visualization</p>")
221
+ # 2. Sentence selector
222
+ sentence_dd = gr.Dropdown(
223
+ label="Choose sentence",
224
+ choices=[],
225
+ interactive=True
226
+ )
227
+ sentences_state = gr.State([])
228
 
 
229
  with gr.Row():
230
  with gr.Column():
231
+ conllu_out = gr.Textbox(
232
+ label="CoNLL-U",
233
+ lines=10,
234
+ show_copy_button=True
235
+ )
236
+ table_out = gr.Dataframe(label="Token Table")
237
+ text_out = gr.Textbox(
238
+ label="Text-based Dependencies",
239
+ lines=8,
240
+ show_copy_button=True
241
+ )
242
 
243
  # Events
244
+ btn.click(
245
  fn=process_text,
246
+ inputs=[txt, mdl],
247
+ outputs=[
248
+ conllu_out, table_out, text_out,
249
+ sentence_dd, sentences_state, svg_out
250
+ ]
251
  )
252
  sentence_dd.change(
253
  fn=update_svg,
254
+ inputs=[sentence_dd, sentences_state],
255
  outputs=svg_out
256
  )
257