Spaces:

sbompolas
/

Lesbian-morphosyntactic-parsing

Sleeping

App Files Files Community

sbompolas commited on Jul 1, 2025

Commit

383a058

verified ·

1 Parent(s): a97b4db

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -165

app.py CHANGED Viewed

@@ -6,8 +6,6 @@ import requests
 import traceback
 from pathlib import Path
-# 1. MODEL VARIANTS & INITIALIZATION
 LESBIAN_MODELS = {}
 MODEL_VARIANTS = {
     "Lesbian-only":           "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
@@ -61,8 +59,6 @@ def initialize_models():
 loaded, load_status = initialize_models()
-# 2. CONLL-U / DATAFRAME / TEXT‐VIZ
 def stanza_doc_to_conllu(doc) -> str:
     lines = []
     for sid, sent in enumerate(doc.sentences, 1):
@@ -83,7 +79,14 @@ def stanza_doc_to_conllu(doc) -> str:
 def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
     rows = []
     for L in conllu.splitlines():
-        if not L or L.startswith("#"):
             continue
         parts = L.split("\t")
         if len(parts) >= 10:
@@ -97,9 +100,16 @@ def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
 def create_dependency_visualization(df: pd.DataFrame) -> str:
     if df.empty:
         return "No data to visualize"
-    viz = ["Dependency Parse Visualization:", "-"*40]
-    for _, r in df.iterrows():
-        w, p, d, h = r['FORM'], r['UPOS'], r['DEPREL'], r['HEAD']
         if h != '0':
             try:
                 hw = df.iloc[int(h)-1]['FORM']
@@ -110,159 +120,8 @@ def create_dependency_visualization(df: pd.DataFrame) -> str:
             viz.append(f"{w} ({p}) --{d}--> ROOT")
     return "\n".join(viz)
-# 3. FULL SVG BUILDER (your original function)
-def create_single_sentence_svg(sentence_data, sentence_num=1, total_sentences=1):
-    try:
-        if isinstance(sentence_data, list):
-            df = pd.DataFrame(sentence_data)
-        else:
-            df = sentence_data
-        word_count = len(df)
-        base_word_width = 100
-        min_spacing = 30
-        word_spacing = max(
-            base_word_width,
-            (word_count * base_word_width + min_spacing * (word_count - 1)) / word_count
-        )
-        width = max(800, word_count * word_spacing + 100)
-        height = 500
-        word_y = height - 120
-        pos_y = word_y + 20
-        features_start_y = pos_y + 15
-        deprel_colors = {
-            'root': '#000000', 'nsubj': '#2980b9', 'obj': '#27ae60', 'det': '#e67e22',
-            'amod': '#8e44ad', 'nmod': '#16a085', 'case': '#34495e', 'punct': '#7f8c8d',
-            'cc': '#d35400', 'conj': '#2c3e50', 'cop': '#e74c3c', 'mark': '#9b59b6',
-            'csubj': '#3498db', 'xcomp': '#1abc9c', 'ccomp': '#f39c12', 'advcl': '#e91e63',
-            'advmod': '#9c27b0', 'obl': '#795548', 'iobj': '#607d8b', 'fixed': '#ff5722',
-            'aux': '#ff9800', 'acl': '#4caf50', 'appos': '#673ab7', 'compound': '#009688'
-        }
-        svg_parts = [
-            f'<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg" '
-            'style="background: white; border: 1px solid #eee;">',
-            '<defs>'
-        ]
-        for deprel, color in deprel_colors.items():
-            marker_id = f"arrow_{deprel}"
-            svg_parts.append(
-                f'<marker id="{marker_id}" markerWidth="4" markerHeight="4" '
-                'markerUnits="userSpaceOnUse" orient="auto" refX="3.5" refY="2">'
-                f'<path d="M0,0 L4,2 L0,4 Z" fill="{color}"/>'
-                f'</marker>'
-            )
-        svg_parts.append('</defs>')
-        svg_parts.append('<g>')
-        word_positions = {}
-        for idx, row in df.iterrows():
-            wid = int(row['ID'])
-            word_positions[wid] = 50 + (wid - 1) * word_spacing
-        used_spans = []
-        for idx, row in df.iterrows():
-            wid = int(row['ID'])
-            hid = int(row['HEAD']) if row['HEAD'] != '0' else 0
-            rel = row['DEPREL']
-            if hid == 0:
-                x = word_positions[wid]
-                col = deprel_colors.get(rel, '#000')
-                svg_parts.append(
-                    f'<line x1="{x}" y1="{word_y-15}" x2="{x}" y2="50" '
-                    f'stroke="{col}" stroke-width="1.5"/>'
-                )
-                mid = (word_y-15 + 50) / 2
-                svg_parts.append(
-                    f'<rect x="{x-15}" y="{mid-8}" width="30" height="14" '
-                    f'fill="white" stroke="{col}" rx="2"/>'
-                )
-                svg_parts.append(
-                    f'<text x="{x}" y="{mid+2}" text-anchor="middle" '
-                    f'fill="{col}" font-family="Arial" font-size="8" font-weight="bold">ROOT</text>'
-                )
-            else:
-                if hid in word_positions:
-                    x1, x2 = word_positions[wid], word_positions[hid]
-                    span = (min(wid, hid), max(wid, hid))
-                    lvl = 0
-                    conflict = True
-                    while conflict:
-                        conflict = False
-                        for es, el in used_spans:
-                            if el == lvl and not (span[1] < es[0] or span[0] > es[1]):
-                                lvl += 1
-                                conflict = True
-                                break
-                    used_spans.append((span, lvl))
-                    dist = abs(x2 - x1)
-                    base_h = min(40 + dist * 0.15, 100)
-                    arc_h = base_h + lvl * 35
-                    col = deprel_colors.get(rel, '#000')
-                    midx = (x1 + x2) / 2
-                    cty = word_y - arc_h
-                    path = f'M {x1} {word_y-15} Q {midx} {cty} {x2} {word_y-15}'
-                    svg_parts.append(
-                        f'<path d="{path}" stroke="{col}" stroke-width="1.5" '
-                        f'fill="none" marker-end="url(#arrow_{rel})"/>'
-                    )
-                    amx = 0.25*x1 + 0.5*midx + 0.25*x2
-                    amy = 0.25*(word_y-15) + 0.5*cty + 0.25*(word_y-15)
-                    lw = len(rel)*6 + 8
-                    svg_parts.append(
-                        f'<rect x="{amx-lw/2}" y="{amy-8}" width="{lw}" height="14" '
-                        f'fill="white" stroke="{col}" rx="2"/>'
-                    )
-                    svg_parts.append(
-                        f'<text x="{amx}" y="{amy+2}" text-anchor="middle" '
-                        f'fill="{col}" font-family="Arial" font-size="8" font-weight="bold">{rel}</text>'
-                    )
-        for idx, row in df.iterrows():
-            wid = int(row['ID'])
-            x = word_positions[wid]
-            word = row['FORM']
-            pos  = row['UPOS']
-            lemma= row['LEMMA']
-            feats= row['FEATS']
-            xpos = row['XPOS']
-            svg_parts.append(
-                f'<text x="{x}" y="{word_y}" text-anchor="middle" '
-                f'font-family="Arial" font-size="13" font-weight="bold">{word}</text>'
-            )
-            ann = []
-            if pos and pos!='_':  ann.append(f"upos={pos}")
-            if lemma and lemma not in ('_', word): ann.append(f"lemma={lemma}")
-            if xpos and xpos!='_':ann.append(f"xpos={xpos}")
-            if feats and feats not in ('', '_'):
-                for fpair in feats.split('|'):
-                    if '=' in fpair: ann.append(fpair)
-            for i,a in enumerate(ann):
-                y0 = features_start_y + i*12
-                svg_parts.append(
-                    f'<text x="{x}" y="{y0}" text-anchor="middle" '
-                    f'font-family="Arial" font-size="7" fill="#666">{a}</text>'
-                )
-        svg_parts.append('</g></svg>')
-        return "".join(svg_parts)
-    except Exception as e:
-        return f"<p>Error creating SVG: {e}</p>"
-# 4. PROCESS & DROPDOWN-UPDATES
 def process_text(text, variant):
     if not text.strip():
@@ -287,12 +146,12 @@ def process_text(text, variant):
     sentences = []
     for sent in doc.sentences:
-        payload = [{
             'ID': w.id, 'FORM': w.text, 'LEMMA': w.lemma or "_",
             'UPOS': w.upos or "_", 'XPOS': w.xpos or "_",
             'FEATS': w.feats or "_", 'HEAD': w.head or 0,
             'DEPREL': w.deprel or "_"
-        } for w in sent.words]
         sentences.append(payload)
     sent_ids = [str(i+1) for i in range(len(sentences))]
@@ -315,8 +174,6 @@ def update_svg(selected_id, sentences):
     except:
         return "<p>Invalid selection</p>"
-# 5. BUILD GRADIO UI
 def create_app():
     with gr.Blocks(title="Lesbian Greek Parser") as app:
         gr.Markdown("# Lesbian Greek Morphosyntactic Parser")

 import traceback
 from pathlib import Path
 LESBIAN_MODELS = {}
 MODEL_VARIANTS = {
     "Lesbian-only":           "sbompolas/Lesbian-Greek-Morphosyntactic-Model",
 loaded, load_status = initialize_models()
 def stanza_doc_to_conllu(doc) -> str:
     lines = []
     for sid, sent in enumerate(doc.sentences, 1):
 def conllu_to_dataframe(conllu: str) -> pd.DataFrame:
     rows = []
     for L in conllu.splitlines():
+        if not L:
+            if rows and rows[-1] != {}:
+                rows.append({})
+            continue
+        if L.startswith("#"):
+            if "=" in L:
+                key, val = L[2:].split("=", 1)
+                rows.append({'ID': f"# {key.strip()} =", 'FORM': val.strip()})
             continue
         parts = L.split("\t")
         if len(parts) >= 10:
 def create_dependency_visualization(df: pd.DataFrame) -> str:
     if df.empty:
         return "No data to visualize"
+    viz = []
+    for i, row in df.iterrows():
+        if pd.isna(row["ID"]):
+            continue
+        if isinstance(row["ID"], str) and row["ID"].startswith("#"):
+            if viz:
+                viz.append("")
+            viz.append(f"{row['ID']} {row['FORM']}")
+            continue
+        w, p, d, h = row['FORM'], row['UPOS'], row['DEPREL'], row['HEAD']
         if h != '0':
             try:
                 hw = df.iloc[int(h)-1]['FORM']
             viz.append(f"{w} ({p}) --{d}--> ROOT")
     return "\n".join(viz)
+# Keep your create_single_sentence_svg as-is; it already includes annotation rendering
+# Be sure ann = [...] block includes: upos, lemma, and all feats, which it does in your version
 def process_text(text, variant):
     if not text.strip():
     sentences = []
     for sent in doc.sentences:
+        payload = [ {
             'ID': w.id, 'FORM': w.text, 'LEMMA': w.lemma or "_",
             'UPOS': w.upos or "_", 'XPOS': w.xpos or "_",
             'FEATS': w.feats or "_", 'HEAD': w.head or 0,
             'DEPREL': w.deprel or "_"
+        } for w in sent.words ]
         sentences.append(payload)
     sent_ids = [str(i+1) for i in range(len(sentences))]
     except:
         return "<p>Invalid selection</p>"
 def create_app():
     with gr.Blocks(title="Lesbian Greek Parser") as app:
         gr.Markdown("# Lesbian Greek Morphosyntactic Parser")