Spaces:
Sleeping
Sleeping
| # app.py | |
| import gradio as gr | |
| import pandas as pd | |
| from typing import List, Tuple | |
| import spacy | |
| # ---- Load spaCy model (wheel installed via requirements.txt) ---- | |
| def load_nlp(): | |
| try: | |
| return spacy.load("en_core_web_sm") | |
| except OSError: | |
| # If the model installs as a module instead of a link, fall back: | |
| import en_core_web_sm | |
| return en_core_web_sm.load() | |
| NLP = load_nlp() | |
| # ---- Simple, transparent extractor (rule-based) ---- | |
| RELATION_MAP = { | |
| "be": "IS_A", | |
| "work": "WORKS_WITH", | |
| "use": "USES", | |
| "lead": "LEADS", | |
| "found": "FOUNDED", | |
| "compete": "COMPETES_WITH", | |
| "acquire": "ACQUIRED", | |
| "partner": "PARTNERS_WITH", | |
| } | |
| def _norm_rel(lemma: str) -> str: | |
| return RELATION_MAP.get(lemma, lemma.upper().replace(" ", "_")) | |
| def extract_triples(text: str) -> List[Tuple[str, str, str, float]]: | |
| doc = NLP(text) | |
| triples = [] | |
| for sent in doc.sents: | |
| # Rule 1: verb + subject + object (incl. prep->pobj) | |
| for token in sent: | |
| if token.pos_ in {"VERB", "AUX"}: | |
| subj = None | |
| obj = None | |
| for ch in token.children: | |
| if ch.dep_ in {"nsubj", "nsubjpass"}: | |
| subj = ch | |
| for ch in token.children: | |
| if ch.dep_ in {"dobj", "attr", "oprd"}: | |
| obj = ch | |
| if ch.dep_ == "prep": | |
| for gc in ch.children: | |
| if gc.dep_ == "pobj": | |
| obj = gc | |
| if subj is not None and obj is not None: | |
| S = " ".join(t.text for t in subj.subtree).strip() | |
| O = " ".join(t.text for t in obj.subtree).strip() | |
| P = _norm_rel(token.lemma_) | |
| conf = 0.75 if token.lemma_ != "be" else 0.7 | |
| triples.append((S, P, O, conf)) | |
| # Rule 2: appositions → HAS_APPOSITION | |
| for token in sent: | |
| if token.dep_ == "appos": | |
| head = token.head | |
| S = " ".join(t.text for t in head.subtree).strip() | |
| O = " ".join(t.text for t in token.subtree).strip() | |
| triples.append((S, "HAS_APPOSITION", O, 0.6)) | |
| # De-duplicate | |
| seen = set() | |
| uniq = [] | |
| for s, p, o, c in triples: | |
| key = (s, p, o) | |
| if key not in seen: | |
| seen.add(key) | |
| uniq.append((s, p, o, c)) | |
| return uniq | |
| # ---- Minimal RDF/Turtle export ---- | |
| from rdflib import Graph, Namespace, URIRef, Literal | |
| from rdflib.namespace import XSD | |
| def to_turtle(df: pd.DataFrame, base_ns: str) -> str: | |
| g = Graph() | |
| EX = Namespace(base_ns) | |
| g.bind("ex", EX) | |
| def to_term(val: str): | |
| v = val.strip() | |
| if v.startswith("http://") or v.startswith("https://"): | |
| return URIRef(v) | |
| return URIRef(base_ns + v.replace(" ", "_")) | |
| for _, row in df.iterrows(): | |
| s, p, o = str(row["subject"]), str(row["relation"]), str(row["object"]) | |
| s_ref, p_ref = to_term(s), to_term(p) | |
| # try literal typing | |
| lit = None | |
| low = o.lower() | |
| if low in {"true", "false"}: | |
| lit = Literal(low == "true") | |
| else: | |
| try: | |
| if "." in o: | |
| lit = Literal(float(o), datatype=XSD.float) | |
| else: | |
| lit = Literal(int(o), datatype=XSD.integer) | |
| except Exception: | |
| pass | |
| o_term = lit if lit is not None else to_term(o) | |
| g.add((s_ref, p_ref, o_term)) | |
| ttl = g.serialize(format="turtle") | |
| return ttl.decode("utf-8") if hasattr(ttl, "decode") else ttl | |
| # ---------- NEW: RDF → rainbow graph (PyVis) ---------- | |
| import hashlib, tempfile, pathlib, html | |
| def _rainbow_color(idx: int) -> str: | |
| # 12-color rainbow palette (hex) | |
| palette = [ | |
| "#e6194b","#ffa31a","#ffe119","#bfef45","#3cb44b","#42d4f4", | |
| "#4363d8","#911eb4","#f032e6","#a9a9a9","#fabed4","#ffd8b1" | |
| ] | |
| return palette[idx % len(palette)] | |
| def _color_for_label(label: str) -> str: | |
| # deterministic color from label | |
| h = int(hashlib.sha1(label.encode("utf-8")).hexdigest(), 16) | |
| return _rainbow_color(h) | |
| def turtle_to_pyvis_html(ttl_text: str): | |
| try: | |
| from pyvis.network import Network | |
| except Exception: | |
| # friendly guidance if pyvis isn't installed | |
| msg = ( | |
| "<div style='padding:12px;border:1px solid #eee'>" | |
| "<b>PyVis not installed.</b> Add <code>pyvis</code> to requirements.txt to enable the interactive graph preview." | |
| "</div>" | |
| ) | |
| return msg | |
| if not ttl_text.strip(): | |
| return "<div style='padding:12px;border:1px solid #eee'>No Turtle to render. Export RDF first.</div>" | |
| g = Graph() | |
| try: | |
| g.parse(data=ttl_text, format="turtle") | |
| except Exception as e: | |
| return f"<pre style='white-space:pre-wrap;color:#b00'>Parse error: {html.escape(str(e))}</pre>" | |
| net = Network(height="620px", width="100%", directed=True, notebook=False, cdn_resources="remote") | |
| net.toggle_physics(True) | |
| def compact(term): | |
| try: | |
| return g.namespace_manager.normalizeUri(term) | |
| except Exception: | |
| return str(term) | |
| nodes_seen = set() | |
| for s, p, o in g: | |
| sn, pn, on = compact(s), compact(p), compact(o) | |
| if sn not in nodes_seen: | |
| net.add_node(sn, label=sn, color=_color_for_label(sn), shape="dot") | |
| nodes_seen.add(sn) | |
| if on not in nodes_seen: | |
| net.add_node(on, label=on, color=_color_for_label(on), shape="dot") | |
| nodes_seen.add(on) | |
| net.add_edge(sn, on, label=pn, color=_color_for_label(pn), arrows="to") | |
| tmpdir = pathlib.Path(tempfile.mkdtemp()) | |
| out_html = tmpdir / "graph.html" | |
| net.show(str(out_html)) | |
| return f'<iframe src="file/{out_html}" style="width:100%;height:640px;border:0" allowfullscreen></iframe>' | |
| # ---- Gradio UI (tiny) ---- | |
| EXAMPLE = ( | |
| "Yas Etessam is a content architect who works with large enterprises. " | |
| "Hockey competes with Swimming. Yas uses knowledge graphs." | |
| ) | |
| EMPTY_DF = pd.DataFrame(columns=["subject", "relation", "object", "confidence"]) | |
| with gr.Blocks(title="Relationship Helper — Minimal") as demo: | |
| gr.Markdown("# Relationship Helper — Minimal\nPaste text → get proposed (subject, relation, object) triples.\n") | |
| text_in = gr.Textbox(value=EXAMPLE, lines=6, label="Text") | |
| with gr.Row(): | |
| btn = gr.Button("Extract", variant="primary") | |
| btn_clear = gr.Button("Clear") | |
| df = gr.Dataframe( | |
| value=EMPTY_DF, # ✅ safe initial value to avoid SSR crash | |
| headers=["subject", "relation", "object", "confidence"], | |
| datatype=["str", "str", "str", "number"], | |
| interactive=True, | |
| row_count=(0, "dynamic"), | |
| label="Proposed relationships (editable)", | |
| ) | |
| with gr.Row(): | |
| base_ns = gr.Textbox(value="https://example.org/", label="Base namespace") | |
| btn_export = gr.Button("Export → Turtle") | |
| turtle_out = gr.Code(label="RDF/Turtle", value="") # ✅ safe initial value | |
| # ---------- NEW: preview controls at the bottom ---------- | |
| with gr.Row(): | |
| btn_preview = gr.Button("Preview Graph 🌈") | |
| graph_html = gr.HTML(label="Knowledge Graph Preview (rainbow)") | |
| def on_extract(text: str): | |
| rows = extract_triples(text) if text.strip() else [] | |
| return pd.DataFrame(rows, columns=["subject", "relation", "object", "confidence"]) | |
| def on_clear(): | |
| return "", EMPTY_DF.copy(), "" # ✅ never return None | |
| def on_export(data: pd.DataFrame, base: str): | |
| if data is None or data.empty: | |
| return "# (No rows to export)" | |
| return to_turtle(data, base or "https://example.org/") | |
| def on_preview(ttl_text: str): | |
| return turtle_to_pyvis_html(ttl_text) | |
| btn.click(on_extract, inputs=text_in, outputs=df) | |
| btn_clear.click(on_clear, outputs=[text_in, df, turtle_out]) | |
| btn_export.click(on_export, inputs=[df, base_ns], outputs=turtle_out) | |
| # NEW: wire up preview | |
| btn_preview.click(on_preview, inputs=turtle_out, outputs=graph_html) | |
| # For Hugging Face Spaces, expose on 0.0.0.0 and allow shareable link | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860, share=True) | |