Safoura Banihashemi
Add format C: letter-prefixed IDs (A1, B2...) JSON support
d0e0f16
Raw
History Blame Contribute Delete
22.6 kB
"""
Argumentation Graph Visualizer — Hugging Face Space
Supports:
- Demosthenes XML (.xml)
- Structured JSON with inline <prem>/<conc> tags (.json, format A)
- Pipeline result JSON with segment dicts (.json, format B)
"""
import re
import os
import json
import tempfile
import gradio as gr
from lxml import etree
# ── Parsers ───────────────────────────────────────────────────────────────────
def parse_xml(filepath):
tree = etree.parse(filepath)
root = tree.getroot()
nodes, sup_edges, att_edges = {}, [], []
for elem in root.iter():
if elem.tag not in ("prem", "conc"):
continue
nid = elem.get("ID")
if not nid:
continue
nodes[nid] = elem.tag.lower()
for t in filter(None, elem.get("SUP", "").split("|")):
sup_edges.append((nid, t.strip()))
for t in filter(None, elem.get("ATT", "").split("|")):
att_edges.append((nid, t.strip()))
return nodes, sup_edges, att_edges
def parse_json_inline(filepath):
"""Format A: sections with 'text' fields containing inline <prem>/<conc> XML tags."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
nodes, sup_edges, att_edges = {}, [], []
all_text = ""
for sec in data.get("sections", {}).values():
all_text += sec.get("text", "") + "\n"
tag_pat = re.compile(r'<(prem|conc)\s+([^>]*?)(?:/>|>.*?</(?:prem|conc)>)', re.DOTALL)
for m in tag_pat.finditer(all_text):
tag, attrs = m.group(1), m.group(2)
id_m = re.search(r'ID="([^"]+)"', attrs)
if not id_m:
continue
nid = id_m.group(1)
nodes[nid] = tag.lower()
sup_m = re.search(r'SUP="([^"]+)"', attrs)
if sup_m:
for t in filter(None, sup_m.group(1).split("|")):
sup_edges.append((nid, t.strip()))
att_m = re.search(r'ATT="([^"]+)"', attrs)
if att_m:
for t in filter(None, att_m.group(1).split("|")):
att_edges.append((nid, t.strip()))
return nodes, sup_edges, att_edges, data.get("document", "")
def _clean_id(raw_id):
"""Convert '[3]' -> '3', strip brackets."""
return raw_id.strip().strip("[]").strip()
def parse_json_result(filepath):
"""
Format B: pipeline result JSON.
Structure: { doc_name: { section_1: { segment1: {id, role, sup, att, text, ...} } } }
IDs restart per section, so we prefix them: S1_3, S2_1, etc.
"""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
doc_name = list(data.keys())[0]
doc = data[doc_name]
nodes, sup_edges, att_edges = {}, [], []
# Section index map: section_1 -> 1, section_2 -> 2, ...
sec_index = {}
for i, sec_k in enumerate(sorted(doc.keys(), key=lambda k: int(re.search(r'\d+', k).group())), 1):
sec_index[sec_k] = i
# First pass: register all nodes with prefixed IDs
# node_map[sec_k][raw_id] = prefixed_id
node_map = {}
for sec_k, sec in doc.items():
si = sec_index[sec_k]
node_map[sec_k] = {}
for seg in sec.values():
raw_id = _clean_id(seg.get("id", ""))
if not raw_id:
continue
role = seg.get("role", "PREM").upper()
ntype = "conc" if role == "CONC" else "prem"
pid = f"S{si}_{raw_id}"
node_map[sec_k][raw_id] = pid
# Store extra info for tooltip
nodes[pid] = {
"type": ntype,
"text": seg.get("text", "")[:120],
"scheme": seg.get("scheme", ""),
"atype": seg.get("type", ""),
}
# Second pass: build edges using prefixed IDs
for sec_k, sec in doc.items():
si = sec_index[sec_k]
nmap = node_map[sec_k]
for seg in sec.values():
raw_id = _clean_id(seg.get("id", ""))
if not raw_id or raw_id not in nmap:
continue
src_pid = nmap[raw_id]
sup_raw = seg.get("sup", "")
if sup_raw:
for t in sup_raw.split(","):
t = _clean_id(t)
if t and t in nmap:
sup_edges.append((src_pid, nmap[t]))
att_raw = seg.get("att", "")
if att_raw:
for t in att_raw.split(","):
t = _clean_id(t)
if t and t in nmap:
att_edges.append((src_pid, nmap[t]))
return nodes, sup_edges, att_edges, doc_name
def parse_json_format_c(filepath):
"""
Format C: pipeline JSON with globally unique letter-prefixed IDs (A1, B2, G13).
Structure: { doc_name: { section_N: { segmentN: {id, role, type, scheme, sup, text} } } }
sup uses '|' separator. role is lowercase. No brackets in IDs.
"""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
doc_name = list(data.keys())[0]
doc = data[doc_name]
nodes, sup_edges, att_edges = {}, [], []
for sec in doc.values():
for seg in sec.values():
nid = seg.get("id", "").strip()
if not nid:
continue
role = seg.get("role", "prem").lower()
ntype = "conc" if role == "conc" else "prem"
nodes[nid] = {
"type": ntype,
"text": seg.get("text", "")[:120],
"scheme": seg.get("scheme", ""),
"atype": seg.get("type", ""),
}
for t in filter(None, seg.get("sup", "").split("|")):
sup_edges.append((nid, t.strip()))
for t in filter(None, seg.get("att", "").split("|")):
att_edges.append((nid, t.strip()))
return nodes, sup_edges, att_edges, doc_name
def detect_and_parse(filepath):
"""Auto-detect JSON format (A/B/C) and dispatch to correct parser."""
with open(filepath, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, dict) and len(data) == 1:
inner = list(data.values())[0]
if isinstance(inner, dict):
first_sec = next(iter(inner.values()), None)
if isinstance(first_sec, dict):
first_seg = next(iter(first_sec.values()), None)
if isinstance(first_seg, dict) and "id" in first_seg:
raw_id = str(first_seg["id"])
# Format C: id like 'A1', 'B2' — letter-prefixed, no brackets
if re.match(r'^[A-Za-z]+\d+', raw_id):
nodes, sup_edges, att_edges, doc_name = parse_json_format_c(filepath)
node_types = {k: v["type"] for k, v in nodes.items()}
return node_types, sup_edges, att_edges, doc_name, nodes
# Format B: id like '[1]' — bracket-wrapped integers, section-scoped
if re.match(r'^\[\d+\]', raw_id):
nodes, sup_edges, att_edges, doc_name = parse_json_result(filepath)
node_types = {k: v["type"] for k, v in nodes.items()}
return node_types, sup_edges, att_edges, doc_name, nodes
# Format A: inline XML tags in text fields
nodes, sup_edges, att_edges, doc_name = parse_json_inline(filepath)
node_types = nodes
node_meta = {k: {"type": v, "text": "", "scheme": "", "atype": ""} for k, v in nodes.items()}
return node_types, sup_edges, att_edges, doc_name, node_meta
# ── HTML builder ──────────────────────────────────────────────────────────────
def build_graph_html(node_types, sup_edges, att_edges, doc_name="", node_meta=None):
if node_meta is None:
node_meta = {}
nodes_js = json.dumps([
{
"id": k,
"type": v,
"text": node_meta.get(k, {}).get("text", ""),
"scheme": node_meta.get(k, {}).get("scheme", ""),
"atype": node_meta.get(k, {}).get("atype", ""),
}
for k, v in node_types.items()
])
sup_js = json.dumps([{"source": s, "target": t} for s, t in sup_edges])
att_js = json.dumps([{"source": s, "target": t} for s, t in att_edges])
label = doc_name.replace("_", " ") if doc_name else "argumentation graph"
return """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/7.9.0/d3.min.js"></script>
<style>
*, *::before, *::after { box-sizing:border-box; margin:0; padding:0; }
:root {
--bg:#0d0f14; --surface:#141720; --border:#2a2f42;
--text:#e2e8f0; --muted:#64748b;
--green:#22c55e; --red:#ef4444;
--blue-bg:#1e3a5f; --blue-br:#3b82f6;
--pur-bg:#4c1d95; --pur-br:#a855f7;
}
html, body { width:100%; height:100%; background:var(--bg); color:var(--text);
font-family:'IBM Plex Sans',system-ui,sans-serif; overflow:hidden; }
body::before {
content:''; position:fixed; inset:0; pointer-events:none;
background-image: linear-gradient(var(--border) 1px, transparent 1px),
linear-gradient(90deg, var(--border) 1px, transparent 1px);
background-size:40px 40px; opacity:.2;
}
header {
position:fixed; top:0; left:0; right:0; z-index:10; height:50px;
display:flex; align-items:center; gap:14px; padding:0 18px;
background:rgba(20,23,32,.92); border-bottom:1px solid var(--border);
backdrop-filter:blur(8px);
}
.title { font-family:'IBM Plex Mono',monospace; font-size:12px; font-weight:600;
color:#fff; white-space:nowrap; overflow:hidden; text-overflow:ellipsis; max-width:380px; }
.title span { color:var(--muted); font-weight:400; }
.legend { display:flex; gap:16px; margin-left:auto; align-items:center; flex-shrink:0; }
.li { display:flex; align-items:center; gap:6px; font-family:'IBM Plex Mono',monospace;
font-size:11px; color:var(--muted); }
.lline { width:26px; height:2px; position:relative; }
.lline::after { content:''; position:absolute; right:-2px; top:-3.5px;
border-left:7px solid; border-top:4.5px solid transparent; border-bottom:4.5px solid transparent; }
.sup { background:var(--green); } .sup::after { border-left-color:var(--green); }
.att { background:var(--red); } .att::after { border-left-color:var(--red); }
.ldot { width:10px; height:10px; border-radius:50%; }
.dp { background:var(--blue-bg); border:1.5px solid var(--blue-br); }
.dc { background:var(--pur-bg); border:1.5px solid var(--pur-br); }
.hint { font-family:'IBM Plex Mono',monospace; font-size:10px; color:var(--muted);
padding-left:12px; border-left:1px solid var(--border); }
#stats { font-family:'IBM Plex Mono',monospace; font-size:11px; color:var(--muted);
padding-left:12px; border-left:1px solid var(--border); }
#stats b { color:var(--text); }
svg { width:100%; display:block; height:calc(100vh - 50px); margin-top:50px; }
.link-sup { stroke:var(--green); stroke-width:1.8; stroke-opacity:.75; fill:none; }
.link-att { stroke:var(--red); stroke-width:2.2; stroke-opacity:.9;
stroke-dasharray:7 3; fill:none; }
.ng { cursor:grab; } .ng:active { cursor:grabbing; }
.cp { fill:var(--blue-bg); stroke:var(--blue-br); stroke-width:1.5; transition:fill .15s; }
.cc { fill:var(--pur-bg); stroke:var(--pur-br); stroke-width:1.5; transition:fill .15s; }
.ng:hover .cp { fill:var(--blue-br); }
.ng:hover .cc { fill:var(--pur-br); }
.nl { fill:#fff; font-family:'IBM Plex Mono',monospace; font-weight:600;
text-anchor:middle; dominant-baseline:central; pointer-events:none; }
#tt {
position:fixed; display:none; background:#1c2030;
border:1px solid var(--border); border-radius:7px; padding:10px 14px;
font-family:'IBM Plex Mono',monospace; font-size:11px; line-height:1.8;
pointer-events:none; z-index:999; max-width:300px; color:var(--text);
box-shadow:0 8px 32px rgba(0,0,0,.6);
}
</style>
</head>
<body>
<header>
<div class="title">""" + label + """ <span>/ argumentation graph</span></div>
<div class="legend">
<div class="li"><div class="lline sup"></div>SUP</div>
<div class="li"><div class="lline att"></div>ATT</div>
<div class="li"><div class="ldot dp"></div>prem</div>
<div class="li"><div class="ldot dc"></div>conc</div>
<div class="hint">drag · scroll · hover</div>
<div id="stats"></div>
</div>
</header>
<svg id="g"></svg>
<div id="tt"></div>
<script>
const NODES = """ + nodes_js + """;
const SUP = """ + sup_js + """;
const ATT = """ + att_js + """;
document.getElementById("stats").innerHTML =
"<b>" + NODES.length + "</b> nodes &nbsp;·&nbsp; <b>" + SUP.length + "</b> SUP &nbsp;·&nbsp; <b>" + ATT.length + "</b> ATT";
const links = SUP.map(e => ({source:e.source, target:e.target, kind:"sup"}))
.concat(ATT.map(e => ({source:e.source, target:e.target, kind:"att"})));
const W = window.innerWidth, H = window.innerHeight - 50;
const svg = d3.select("#g").attr("viewBox", [0, 0, W, H]);
svg.append("defs").html(
'<marker id="ms" viewBox="0 -4 10 8" refX="20" refY="0" markerWidth="7" markerHeight="7" orient="auto">' +
'<path d="M0,-4L10,0L0,4Z" fill="#22c55e"/></marker>' +
'<marker id="ma" viewBox="0 -4 10 8" refX="20" refY="0" markerWidth="7" markerHeight="7" orient="auto">' +
'<path d="M0,-4L10,0L0,4Z" fill="#ef4444"/></marker>'
);
const g = svg.append("g");
svg.call(d3.zoom().scaleExtent([0.1, 5]).on("zoom", e => g.attr("transform", e.transform)));
const sim = d3.forceSimulation(NODES)
.force("link", d3.forceLink(links).id(d => d.id).distance(90).strength(0.5))
.force("charge", d3.forceManyBody().strength(-360))
.force("center", d3.forceCenter(W / 2, H / 2))
.force("collision", d3.forceCollide(22));
const lsel = g.append("g").selectAll("line").data(links).join("line")
.attr("class", d => d.kind === "sup" ? "link-sup" : "link-att")
.attr("marker-end", d => d.kind === "sup" ? "url(#ms)" : "url(#ma)");
const adj = {};
NODES.forEach(n => { adj[n.id] = {sf:[], st:[], af:[], at:[]}; });
SUP.forEach(e => { adj[e.target].sf.push(e.source); adj[e.source].st.push(e.target); });
ATT.forEach(e => { adj[e.target].af.push(e.source); adj[e.source].at.push(e.target); });
const tt = document.getElementById("tt");
const nsel = g.append("g").selectAll("g").data(NODES).join("g")
.attr("class", "ng")
.call(d3.drag()
.on("start", (e, d) => { if (!e.active) sim.alphaTarget(0.3).restart(); d.fx = d.x; d.fy = d.y; })
.on("drag", (e, d) => { d.fx = e.x; d.fy = e.y; })
.on("end", (e, d) => { if (!e.active) sim.alphaTarget(0); d.fx = null; d.fy = null; }))
.on("mouseover", (e, d) => {
const a = adj[d.id];
let h = "<b style='font-size:13px'>" + d.id + "</b>";
h += " <span style='color:#64748b'>" + d.type + "</span>";
if (d.scheme) h += " <span style='color:#94a3b8'>· " + d.scheme + "</span>";
if (d.atype) h += " <span style='color:#94a3b8'>· type:" + d.atype + "</span>";
h += "<hr style='border:none;border-top:1px solid #2a2f42;margin:5px 0'>";
if (d.text) h += "<div style='color:#94a3b8;font-size:10px;margin-bottom:5px;line-height:1.5'>" + d.text + (d.text.length >= 120 ? "…" : "") + "</div>";
if (a.sf.length) h += "<div style='color:#22c55e'>◀ SUP from: " + a.sf.join(", ") + "</div>";
if (a.st.length) h += "<div style='color:#22c55e'>▶ SUP to: " + a.st.join(", ") + "</div>";
if (a.af.length) h += "<div style='color:#ef4444'>◀ ATT from: " + a.af.join(", ") + "</div>";
if (a.at.length) h += "<div style='color:#ef4444'>▶ ATT to: " + a.at.join(", ") + "</div>";
if (!a.sf.length && !a.st.length && !a.af.length && !a.at.length)
h += "<span style='color:#64748b'>isolated node</span>";
tt.innerHTML = h;
tt.style.display = "block";
})
.on("mousemove", e => { tt.style.left = (e.clientX + 14) + "px"; tt.style.top = (e.clientY - 12) + "px"; })
.on("mouseleave", () => { tt.style.display = "none"; });
nsel.append("circle").attr("r", 14).attr("class", d => d.type === "prem" ? "cp" : "cc");
nsel.append("text").attr("class", "nl")
.style("font-size", d => d.id.length > 5 ? "6.5px" : "8px")
.text(d => d.id);
sim.on("tick", () => {
lsel.attr("x1", d => d.source.x).attr("y1", d => d.source.y)
.attr("x2", d => d.target.x).attr("y2", d => d.target.y);
nsel.attr("transform", d => "translate(" + d.x + "," + d.y + ")");
});
</script>
</body>
</html>"""
# ── Gradio handler ────────────────────────────────────────────────────────────
def process_file(file_obj):
if file_obj is None:
return None, "<p style='color:#ef4444;font-family:monospace;padding:20px'>No file uploaded.</p>"
path = file_obj.name
ext = path.rsplit(".", 1)[-1].lower()
try:
if ext == "xml":
nodes, sup_edges, att_edges = parse_xml(path)
doc_name = os.path.basename(path).replace(".xml", "").replace("_", " ")
node_meta = {k: {"type": v, "text": "", "scheme": "", "atype": ""} for k, v in nodes.items()}
node_types = nodes
elif ext == "json":
node_types, sup_edges, att_edges, doc_name, node_meta = detect_and_parse(path)
else:
return None, "<p style='color:#ef4444;font-family:monospace;padding:20px'>Upload .xml or .json</p>"
except Exception as ex:
import traceback
return None, f"<pre style='color:#ef4444;font-family:monospace;padding:20px'>{traceback.format_exc()}</pre>"
if not node_types:
return None, "<p style='color:#ef4444;font-family:monospace;padding:20px'>No nodes found in file.</p>"
html = build_graph_html(node_types, sup_edges, att_edges, doc_name, node_meta)
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode="w", encoding="utf-8")
tmp.write(html)
tmp.close()
srcdoc = html.replace("&", "&amp;").replace('"', "&quot;")
iframe = f'<iframe srcdoc="{srcdoc}" style="width:100%;height:680px;border:none;border-radius:8px;"></iframe>'
return tmp.name, iframe
# ── UI ────────────────────────────────────────────────────────────────────────
CSS = """
#title-row { text-align:center; padding:8px 0 4px; }
#title-row h1 { font-family:'IBM Plex Mono',monospace; font-size:1.25rem;
font-weight:700; color:#e2e8f0; }
#title-row p { color:#64748b; font-size:.85rem; margin-top:4px; }
footer { display:none !important; }
"""
with gr.Blocks(
title="Argumentation Graph Visualizer",
theme=gr.themes.Base(primary_hue="blue", neutral_hue="slate").set(
body_background_fill="#0d0f14",
body_text_color="#e2e8f0",
block_background_fill="#141720",
block_border_color="#2a2f42",
input_background_fill="#1c2030",
button_primary_background_fill="#1d4ed8",
button_primary_background_fill_hover="#2563eb",
button_primary_text_color="#ffffff",
),
css=CSS,
) as demo:
with gr.Row(elem_id="title-row"):
gr.HTML("""
<h1>⚖ Argumentation Graph Visualizer</h1>
<p>Upload a Demosthenes <b>.xml</b> or pipeline result <b>.json</b>
→ interactive force-directed argument graph.</p>
""")
with gr.Row():
with gr.Column(scale=1, min_width=260):
file_input = gr.File(label="Upload XML or JSON", file_types=[".xml", ".json"])
run_btn = gr.Button("Build Graph", variant="primary", size="lg")
dl_out = gr.File(label="Download graph HTML")
gr.HTML("""
<div style="font-family:monospace;font-size:11px;color:#64748b;
border:1px solid #2a2f42;border-radius:8px;padding:12px;margin-top:6px;">
<div style="color:#e2e8f0;font-weight:600;margin-bottom:6px;">Legend</div>
<div style="display:flex;align-items:center;gap:8px;margin-bottom:4px;">
<svg width="34" height="8">
<line x1="0" y1="4" x2="27" y2="4" stroke="#22c55e" stroke-width="2"/>
<polygon points="27,0 34,4 27,8" fill="#22c55e"/>
</svg> SUP — support
</div>
<div style="display:flex;align-items:center;gap:8px;margin-bottom:4px;">
<svg width="34" height="8">
<line x1="0" y1="4" x2="27" y2="4" stroke="#ef4444"
stroke-width="2" stroke-dasharray="4 2"/>
<polygon points="27,0 34,4 27,8" fill="#ef4444"/>
</svg> ATT — attack
</div>
<div style="display:flex;align-items:center;gap:8px;margin-bottom:4px;">
<svg width="12" height="12">
<circle cx="6" cy="6" r="5" fill="#1e3a5f" stroke="#3b82f6" stroke-width="1.5"/>
</svg> prem (premise)
</div>
<div style="display:flex;align-items:center;gap:8px;">
<svg width="12" height="12">
<circle cx="6" cy="6" r="5" fill="#4c1d95" stroke="#a855f7" stroke-width="1.5"/>
</svg> conc (conclusion)
</div>
<div style="margin-top:8px;color:#475569;line-height:1.6;">
Accepts .xml · .json (inline tags) · .json (pipeline result)<br><br>
Hover node → see text snippet, type, scheme, relations
</div>
</div>
""")
with gr.Column(scale=3):
graph_out = gr.HTML(
value="<div style='height:680px;display:flex;align-items:center;"
"justify-content:center;font-family:monospace;color:#475569;"
"border:1px dashed #2a2f42;border-radius:10px;'>"
"Upload a file and click <b style='color:#e2e8f0'> Build Graph </b></div>"
)
run_btn.click(fn=process_file, inputs=file_input, outputs=[dl_out, graph_out])
file_input.change(fn=process_file, inputs=file_input, outputs=[dl_out, graph_out])
if __name__ == "__main__":
demo.launch()