# app.py import gradio as gr import pandas as pd from typing import List, Tuple import spacy # ---- Load spaCy model (wheel installed via requirements.txt) ---- def load_nlp(): try: return spacy.load("en_core_web_sm") except OSError: # If the model installs as a module instead of a link, fall back: import en_core_web_sm return en_core_web_sm.load() NLP = load_nlp() # ---- Simple, transparent extractor (rule-based) ---- RELATION_MAP = { "be": "IS_A", "work": "WORKS_WITH", "use": "USES", "lead": "LEADS", "found": "FOUNDED", "compete": "COMPETES_WITH", "acquire": "ACQUIRED", "partner": "PARTNERS_WITH", } def _norm_rel(lemma: str) -> str: return RELATION_MAP.get(lemma, lemma.upper().replace(" ", "_")) def extract_triples(text: str) -> List[Tuple[str, str, str, float]]: doc = NLP(text) triples = [] for sent in doc.sents: # Rule 1: verb + subject + object (incl. prep->pobj) for token in sent: if token.pos_ in {"VERB", "AUX"}: subj = None obj = None for ch in token.children: if ch.dep_ in {"nsubj", "nsubjpass"}: subj = ch for ch in token.children: if ch.dep_ in {"dobj", "attr", "oprd"}: obj = ch if ch.dep_ == "prep": for gc in ch.children: if gc.dep_ == "pobj": obj = gc if subj is not None and obj is not None: S = " ".join(t.text for t in subj.subtree).strip() O = " ".join(t.text for t in obj.subtree).strip() P = _norm_rel(token.lemma_) conf = 0.75 if token.lemma_ != "be" else 0.7 triples.append((S, P, O, conf)) # Rule 2: appositions → HAS_APPOSITION for token in sent: if token.dep_ == "appos": head = token.head S = " ".join(t.text for t in head.subtree).strip() O = " ".join(t.text for t in token.subtree).strip() triples.append((S, "HAS_APPOSITION", O, 0.6)) # De-duplicate seen = set() uniq = [] for s, p, o, c in triples: key = (s, p, o) if key not in seen: seen.add(key) uniq.append((s, p, o, c)) return uniq # ---- Minimal RDF/Turtle export ---- from rdflib import Graph, Namespace, URIRef, Literal from rdflib.namespace import XSD def to_turtle(df: pd.DataFrame, base_ns: str) -> str: g = Graph() EX = Namespace(base_ns) g.bind("ex", EX) def to_term(val: str): v = val.strip() if v.startswith("http://") or v.startswith("https://"): return URIRef(v) return URIRef(base_ns + v.replace(" ", "_")) for _, row in df.iterrows(): s, p, o = str(row["subject"]), str(row["relation"]), str(row["object"]) s_ref, p_ref = to_term(s), to_term(p) # try literal typing lit = None low = o.lower() if low in {"true", "false"}: lit = Literal(low == "true") else: try: if "." in o: lit = Literal(float(o), datatype=XSD.float) else: lit = Literal(int(o), datatype=XSD.integer) except Exception: pass o_term = lit if lit is not None else to_term(o) g.add((s_ref, p_ref, o_term)) ttl = g.serialize(format="turtle") return ttl.decode("utf-8") if hasattr(ttl, "decode") else ttl # ---------- NEW: RDF → rainbow graph (PyVis) ---------- import hashlib, tempfile, pathlib, html def _rainbow_color(idx: int) -> str: # 12-color rainbow palette (hex) palette = [ "#e6194b","#ffa31a","#ffe119","#bfef45","#3cb44b","#42d4f4", "#4363d8","#911eb4","#f032e6","#a9a9a9","#fabed4","#ffd8b1" ] return palette[idx % len(palette)] def _color_for_label(label: str) -> str: # deterministic color from label h = int(hashlib.sha1(label.encode("utf-8")).hexdigest(), 16) return _rainbow_color(h) def turtle_to_pyvis_html(ttl_text: str): try: from pyvis.network import Network except Exception: # friendly guidance if pyvis isn't installed msg = ( "
" "PyVis not installed. Add pyvis to requirements.txt to enable the interactive graph preview." "
" ) return msg if not ttl_text.strip(): return "
No Turtle to render. Export RDF first.
" g = Graph() try: g.parse(data=ttl_text, format="turtle") except Exception as e: return f"
Parse error: {html.escape(str(e))}
" net = Network(height="620px", width="100%", directed=True, notebook=False, cdn_resources="remote") net.toggle_physics(True) def compact(term): try: return g.namespace_manager.normalizeUri(term) except Exception: return str(term) nodes_seen = set() for s, p, o in g: sn, pn, on = compact(s), compact(p), compact(o) if sn not in nodes_seen: net.add_node(sn, label=sn, color=_color_for_label(sn), shape="dot") nodes_seen.add(sn) if on not in nodes_seen: net.add_node(on, label=on, color=_color_for_label(on), shape="dot") nodes_seen.add(on) net.add_edge(sn, on, label=pn, color=_color_for_label(pn), arrows="to") tmpdir = pathlib.Path(tempfile.mkdtemp()) out_html = tmpdir / "graph.html" net.show(str(out_html)) return f'' # ---- Gradio UI (tiny) ---- EXAMPLE = ( "Yas Etessam is a content architect who works with large enterprises. " "Hockey competes with Swimming. Yas uses knowledge graphs." ) EMPTY_DF = pd.DataFrame(columns=["subject", "relation", "object", "confidence"]) with gr.Blocks(title="Relationship Helper — Minimal") as demo: gr.Markdown("# Relationship Helper — Minimal\nPaste text → get proposed (subject, relation, object) triples.\n") text_in = gr.Textbox(value=EXAMPLE, lines=6, label="Text") with gr.Row(): btn = gr.Button("Extract", variant="primary") btn_clear = gr.Button("Clear") df = gr.Dataframe( value=EMPTY_DF, # ✅ safe initial value to avoid SSR crash headers=["subject", "relation", "object", "confidence"], datatype=["str", "str", "str", "number"], interactive=True, row_count=(0, "dynamic"), label="Proposed relationships (editable)", ) with gr.Row(): base_ns = gr.Textbox(value="https://example.org/", label="Base namespace") btn_export = gr.Button("Export → Turtle") turtle_out = gr.Code(label="RDF/Turtle", value="") # ✅ safe initial value # ---------- NEW: preview controls at the bottom ---------- with gr.Row(): btn_preview = gr.Button("Preview Graph 🌈") graph_html = gr.HTML(label="Knowledge Graph Preview (rainbow)") def on_extract(text: str): rows = extract_triples(text) if text.strip() else [] return pd.DataFrame(rows, columns=["subject", "relation", "object", "confidence"]) def on_clear(): return "", EMPTY_DF.copy(), "" # ✅ never return None def on_export(data: pd.DataFrame, base: str): if data is None or data.empty: return "# (No rows to export)" return to_turtle(data, base or "https://example.org/") def on_preview(ttl_text: str): return turtle_to_pyvis_html(ttl_text) btn.click(on_extract, inputs=text_in, outputs=df) btn_clear.click(on_clear, outputs=[text_in, df, turtle_out]) btn_export.click(on_export, inputs=[df, base_ns], outputs=turtle_out) # NEW: wire up preview btn_preview.click(on_preview, inputs=turtle_out, outputs=graph_html) # For Hugging Face Spaces, expose on 0.0.0.0 and allow shareable link if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, share=True)