""" Argumentation Graph Visualizer — Hugging Face Space Supports: - Demosthenes XML (.xml) - Structured JSON with inline / tags (.json, format A) - Pipeline result JSON with segment dicts (.json, format B) """ import re import os import json import tempfile import gradio as gr from lxml import etree # ── Parsers ─────────────────────────────────────────────────────────────────── def parse_xml(filepath): tree = etree.parse(filepath) root = tree.getroot() nodes, sup_edges, att_edges = {}, [], [] for elem in root.iter(): if elem.tag not in ("prem", "conc"): continue nid = elem.get("ID") if not nid: continue nodes[nid] = elem.tag.lower() for t in filter(None, elem.get("SUP", "").split("|")): sup_edges.append((nid, t.strip())) for t in filter(None, elem.get("ATT", "").split("|")): att_edges.append((nid, t.strip())) return nodes, sup_edges, att_edges def parse_json_inline(filepath): """Format A: sections with 'text' fields containing inline / XML tags.""" with open(filepath, "r", encoding="utf-8") as f: data = json.load(f) nodes, sup_edges, att_edges = {}, [], [] all_text = "" for sec in data.get("sections", {}).values(): all_text += sec.get("text", "") + "\n" tag_pat = re.compile(r'<(prem|conc)\s+([^>]*?)(?:/>|>.*?)', re.DOTALL) for m in tag_pat.finditer(all_text): tag, attrs = m.group(1), m.group(2) id_m = re.search(r'ID="([^"]+)"', attrs) if not id_m: continue nid = id_m.group(1) nodes[nid] = tag.lower() sup_m = re.search(r'SUP="([^"]+)"', attrs) if sup_m: for t in filter(None, sup_m.group(1).split("|")): sup_edges.append((nid, t.strip())) att_m = re.search(r'ATT="([^"]+)"', attrs) if att_m: for t in filter(None, att_m.group(1).split("|")): att_edges.append((nid, t.strip())) return nodes, sup_edges, att_edges, data.get("document", "") def _clean_id(raw_id): """Convert '[3]' -> '3', strip brackets.""" return raw_id.strip().strip("[]").strip() def parse_json_result(filepath): """ Format B: pipeline result JSON. Structure: { doc_name: { section_1: { segment1: {id, role, sup, att, text, ...} } } } IDs restart per section, so we prefix them: S1_3, S2_1, etc. """ with open(filepath, "r", encoding="utf-8") as f: data = json.load(f) doc_name = list(data.keys())[0] doc = data[doc_name] nodes, sup_edges, att_edges = {}, [], [] # Section index map: section_1 -> 1, section_2 -> 2, ... sec_index = {} for i, sec_k in enumerate(sorted(doc.keys(), key=lambda k: int(re.search(r'\d+', k).group())), 1): sec_index[sec_k] = i # First pass: register all nodes with prefixed IDs # node_map[sec_k][raw_id] = prefixed_id node_map = {} for sec_k, sec in doc.items(): si = sec_index[sec_k] node_map[sec_k] = {} for seg in sec.values(): raw_id = _clean_id(seg.get("id", "")) if not raw_id: continue role = seg.get("role", "PREM").upper() ntype = "conc" if role == "CONC" else "prem" pid = f"S{si}_{raw_id}" node_map[sec_k][raw_id] = pid # Store extra info for tooltip nodes[pid] = { "type": ntype, "text": seg.get("text", "")[:120], "scheme": seg.get("scheme", ""), "atype": seg.get("type", ""), } # Second pass: build edges using prefixed IDs for sec_k, sec in doc.items(): si = sec_index[sec_k] nmap = node_map[sec_k] for seg in sec.values(): raw_id = _clean_id(seg.get("id", "")) if not raw_id or raw_id not in nmap: continue src_pid = nmap[raw_id] sup_raw = seg.get("sup", "") if sup_raw: for t in sup_raw.split(","): t = _clean_id(t) if t and t in nmap: sup_edges.append((src_pid, nmap[t])) att_raw = seg.get("att", "") if att_raw: for t in att_raw.split(","): t = _clean_id(t) if t and t in nmap: att_edges.append((src_pid, nmap[t])) return nodes, sup_edges, att_edges, doc_name def parse_json_format_c(filepath): """ Format C: pipeline JSON with globally unique letter-prefixed IDs (A1, B2, G13). Structure: { doc_name: { section_N: { segmentN: {id, role, type, scheme, sup, text} } } } sup uses '|' separator. role is lowercase. No brackets in IDs. """ with open(filepath, "r", encoding="utf-8") as f: data = json.load(f) doc_name = list(data.keys())[0] doc = data[doc_name] nodes, sup_edges, att_edges = {}, [], [] for sec in doc.values(): for seg in sec.values(): nid = seg.get("id", "").strip() if not nid: continue role = seg.get("role", "prem").lower() ntype = "conc" if role == "conc" else "prem" nodes[nid] = { "type": ntype, "text": seg.get("text", "")[:120], "scheme": seg.get("scheme", ""), "atype": seg.get("type", ""), } for t in filter(None, seg.get("sup", "").split("|")): sup_edges.append((nid, t.strip())) for t in filter(None, seg.get("att", "").split("|")): att_edges.append((nid, t.strip())) return nodes, sup_edges, att_edges, doc_name def detect_and_parse(filepath): """Auto-detect JSON format (A/B/C) and dispatch to correct parser.""" with open(filepath, "r", encoding="utf-8") as f: data = json.load(f) if isinstance(data, dict) and len(data) == 1: inner = list(data.values())[0] if isinstance(inner, dict): first_sec = next(iter(inner.values()), None) if isinstance(first_sec, dict): first_seg = next(iter(first_sec.values()), None) if isinstance(first_seg, dict) and "id" in first_seg: raw_id = str(first_seg["id"]) # Format C: id like 'A1', 'B2' — letter-prefixed, no brackets if re.match(r'^[A-Za-z]+\d+', raw_id): nodes, sup_edges, att_edges, doc_name = parse_json_format_c(filepath) node_types = {k: v["type"] for k, v in nodes.items()} return node_types, sup_edges, att_edges, doc_name, nodes # Format B: id like '[1]' — bracket-wrapped integers, section-scoped if re.match(r'^\[\d+\]', raw_id): nodes, sup_edges, att_edges, doc_name = parse_json_result(filepath) node_types = {k: v["type"] for k, v in nodes.items()} return node_types, sup_edges, att_edges, doc_name, nodes # Format A: inline XML tags in text fields nodes, sup_edges, att_edges, doc_name = parse_json_inline(filepath) node_types = nodes node_meta = {k: {"type": v, "text": "", "scheme": "", "atype": ""} for k, v in nodes.items()} return node_types, sup_edges, att_edges, doc_name, node_meta # ── HTML builder ────────────────────────────────────────────────────────────── def build_graph_html(node_types, sup_edges, att_edges, doc_name="", node_meta=None): if node_meta is None: node_meta = {} nodes_js = json.dumps([ { "id": k, "type": v, "text": node_meta.get(k, {}).get("text", ""), "scheme": node_meta.get(k, {}).get("scheme", ""), "atype": node_meta.get(k, {}).get("atype", ""), } for k, v in node_types.items() ]) sup_js = json.dumps([{"source": s, "target": t} for s, t in sup_edges]) att_js = json.dumps([{"source": s, "target": t} for s, t in att_edges]) label = doc_name.replace("_", " ") if doc_name else "argumentation graph" return """

""" + label + """ / argumentation graph

SUP

ATT

prem

conc

drag · scroll · hover

""" # ── Gradio handler ──────────────────────────────────────────────────────────── def process_file(file_obj): if file_obj is None: return None, "

No file uploaded.

" path = file_obj.name ext = path.rsplit(".", 1)[-1].lower() try: if ext == "xml": nodes, sup_edges, att_edges = parse_xml(path) doc_name = os.path.basename(path).replace(".xml", "").replace("_", " ") node_meta = {k: {"type": v, "text": "", "scheme": "", "atype": ""} for k, v in nodes.items()} node_types = nodes elif ext == "json": node_types, sup_edges, att_edges, doc_name, node_meta = detect_and_parse(path) else: return None, "

Upload .xml or .json

" except Exception as ex: import traceback return None, f"

{traceback.format_exc()}

" if not node_types: return None, "

No nodes found in file.

" html = build_graph_html(node_types, sup_edges, att_edges, doc_name, node_meta) tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode="w", encoding="utf-8") tmp.write(html) tmp.close() srcdoc = html.replace("&", "&").replace('"', """) iframe = f'' return tmp.name, iframe # ── UI ──────────────────────────────────────────────────────────────────────── CSS = """ #title-row { text-align:center; padding:8px 0 4px; } #title-row h1 { font-family:'IBM Plex Mono',monospace; font-size:1.25rem; font-weight:700; color:#e2e8f0; } #title-row p { color:#64748b; font-size:.85rem; margin-top:4px; } footer { display:none !important; } """ with gr.Blocks( title="Argumentation Graph Visualizer", theme=gr.themes.Base(primary_hue="blue", neutral_hue="slate").set( body_background_fill="#0d0f14", body_text_color="#e2e8f0", block_background_fill="#141720", block_border_color="#2a2f42", input_background_fill="#1c2030", button_primary_background_fill="#1d4ed8", button_primary_background_fill_hover="#2563eb", button_primary_text_color="#ffffff", ), css=CSS, ) as demo: with gr.Row(elem_id="title-row"): gr.HTML("""

⚖ Argumentation Graph Visualizer

Upload a Demosthenes .xml or pipeline result .json → interactive force-directed argument graph.

""") with gr.Row(): with gr.Column(scale=1, min_width=260): file_input = gr.File(label="Upload XML or JSON", file_types=[".xml", ".json"]) run_btn = gr.Button("Build Graph", variant="primary", size="lg") dl_out = gr.File(label="Download graph HTML") gr.HTML("""

Legend

                 SUP — support
              
                 ATT — attack
              
                 prem (premise)
              
                 conc (conclusion)
              
                Accepts .xml · .json (inline tags) · .json (pipeline result)

                Hover node → see text snippet, type, scheme, relations

""") with gr.Column(scale=3): graph_out = gr.HTML( value="

"
                      "Upload a file and click  Build Graph 

" ) run_btn.click(fn=process_file, inputs=file_input, outputs=[dl_out, graph_out]) file_input.change(fn=process_file, inputs=file_input, outputs=[dl_out, graph_out]) if __name__ == "__main__": demo.launch()