Relationships / app.py
yetessam's picture
Update app.py
2c0f8d8 verified
# app.py
import gradio as gr
import pandas as pd
from typing import List, Tuple
import spacy
# ---- Load spaCy model (wheel installed via requirements.txt) ----
def load_nlp():
try:
return spacy.load("en_core_web_sm")
except OSError:
# If the model installs as a module instead of a link, fall back:
import en_core_web_sm
return en_core_web_sm.load()
NLP = load_nlp()
# ---- Simple, transparent extractor (rule-based) ----
RELATION_MAP = {
"be": "IS_A",
"work": "WORKS_WITH",
"use": "USES",
"lead": "LEADS",
"found": "FOUNDED",
"compete": "COMPETES_WITH",
"acquire": "ACQUIRED",
"partner": "PARTNERS_WITH",
}
def _norm_rel(lemma: str) -> str:
return RELATION_MAP.get(lemma, lemma.upper().replace(" ", "_"))
def extract_triples(text: str) -> List[Tuple[str, str, str, float]]:
doc = NLP(text)
triples = []
for sent in doc.sents:
# Rule 1: verb + subject + object (incl. prep->pobj)
for token in sent:
if token.pos_ in {"VERB", "AUX"}:
subj = None
obj = None
for ch in token.children:
if ch.dep_ in {"nsubj", "nsubjpass"}:
subj = ch
for ch in token.children:
if ch.dep_ in {"dobj", "attr", "oprd"}:
obj = ch
if ch.dep_ == "prep":
for gc in ch.children:
if gc.dep_ == "pobj":
obj = gc
if subj is not None and obj is not None:
S = " ".join(t.text for t in subj.subtree).strip()
O = " ".join(t.text for t in obj.subtree).strip()
P = _norm_rel(token.lemma_)
conf = 0.75 if token.lemma_ != "be" else 0.7
triples.append((S, P, O, conf))
# Rule 2: appositions → HAS_APPOSITION
for token in sent:
if token.dep_ == "appos":
head = token.head
S = " ".join(t.text for t in head.subtree).strip()
O = " ".join(t.text for t in token.subtree).strip()
triples.append((S, "HAS_APPOSITION", O, 0.6))
# De-duplicate
seen = set()
uniq = []
for s, p, o, c in triples:
key = (s, p, o)
if key not in seen:
seen.add(key)
uniq.append((s, p, o, c))
return uniq
# ---- Minimal RDF/Turtle export ----
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import XSD
def to_turtle(df: pd.DataFrame, base_ns: str) -> str:
g = Graph()
EX = Namespace(base_ns)
g.bind("ex", EX)
def to_term(val: str):
v = val.strip()
if v.startswith("http://") or v.startswith("https://"):
return URIRef(v)
return URIRef(base_ns + v.replace(" ", "_"))
for _, row in df.iterrows():
s, p, o = str(row["subject"]), str(row["relation"]), str(row["object"])
s_ref, p_ref = to_term(s), to_term(p)
# try literal typing
lit = None
low = o.lower()
if low in {"true", "false"}:
lit = Literal(low == "true")
else:
try:
if "." in o:
lit = Literal(float(o), datatype=XSD.float)
else:
lit = Literal(int(o), datatype=XSD.integer)
except Exception:
pass
o_term = lit if lit is not None else to_term(o)
g.add((s_ref, p_ref, o_term))
ttl = g.serialize(format="turtle")
return ttl.decode("utf-8") if hasattr(ttl, "decode") else ttl
# ---------- NEW: RDF → rainbow graph (PyVis) ----------
import hashlib, tempfile, pathlib, html
def _rainbow_color(idx: int) -> str:
# 12-color rainbow palette (hex)
palette = [
"#e6194b","#ffa31a","#ffe119","#bfef45","#3cb44b","#42d4f4",
"#4363d8","#911eb4","#f032e6","#a9a9a9","#fabed4","#ffd8b1"
]
return palette[idx % len(palette)]
def _color_for_label(label: str) -> str:
# deterministic color from label
h = int(hashlib.sha1(label.encode("utf-8")).hexdigest(), 16)
return _rainbow_color(h)
def turtle_to_pyvis_html(ttl_text: str):
try:
from pyvis.network import Network
except Exception:
# friendly guidance if pyvis isn't installed
msg = (
"<div style='padding:12px;border:1px solid #eee'>"
"<b>PyVis not installed.</b> Add <code>pyvis</code> to requirements.txt to enable the interactive graph preview."
"</div>"
)
return msg
if not ttl_text.strip():
return "<div style='padding:12px;border:1px solid #eee'>No Turtle to render. Export RDF first.</div>"
g = Graph()
try:
g.parse(data=ttl_text, format="turtle")
except Exception as e:
return f"<pre style='white-space:pre-wrap;color:#b00'>Parse error: {html.escape(str(e))}</pre>"
net = Network(height="620px", width="100%", directed=True, notebook=False, cdn_resources="remote")
net.toggle_physics(True)
def compact(term):
try:
return g.namespace_manager.normalizeUri(term)
except Exception:
return str(term)
nodes_seen = set()
for s, p, o in g:
sn, pn, on = compact(s), compact(p), compact(o)
if sn not in nodes_seen:
net.add_node(sn, label=sn, color=_color_for_label(sn), shape="dot")
nodes_seen.add(sn)
if on not in nodes_seen:
net.add_node(on, label=on, color=_color_for_label(on), shape="dot")
nodes_seen.add(on)
net.add_edge(sn, on, label=pn, color=_color_for_label(pn), arrows="to")
tmpdir = pathlib.Path(tempfile.mkdtemp())
out_html = tmpdir / "graph.html"
net.show(str(out_html))
return f'<iframe src="file/{out_html}" style="width:100%;height:640px;border:0" allowfullscreen></iframe>'
# ---- Gradio UI (tiny) ----
EXAMPLE = (
"Yas Etessam is a content architect who works with large enterprises. "
"Hockey competes with Swimming. Yas uses knowledge graphs."
)
EMPTY_DF = pd.DataFrame(columns=["subject", "relation", "object", "confidence"])
with gr.Blocks(title="Relationship Helper — Minimal") as demo:
gr.Markdown("# Relationship Helper — Minimal\nPaste text → get proposed (subject, relation, object) triples.\n")
text_in = gr.Textbox(value=EXAMPLE, lines=6, label="Text")
with gr.Row():
btn = gr.Button("Extract", variant="primary")
btn_clear = gr.Button("Clear")
df = gr.Dataframe(
value=EMPTY_DF, # ✅ safe initial value to avoid SSR crash
headers=["subject", "relation", "object", "confidence"],
datatype=["str", "str", "str", "number"],
interactive=True,
row_count=(0, "dynamic"),
label="Proposed relationships (editable)",
)
with gr.Row():
base_ns = gr.Textbox(value="https://example.org/", label="Base namespace")
btn_export = gr.Button("Export → Turtle")
turtle_out = gr.Code(label="RDF/Turtle", value="") # ✅ safe initial value
# ---------- NEW: preview controls at the bottom ----------
with gr.Row():
btn_preview = gr.Button("Preview Graph 🌈")
graph_html = gr.HTML(label="Knowledge Graph Preview (rainbow)")
def on_extract(text: str):
rows = extract_triples(text) if text.strip() else []
return pd.DataFrame(rows, columns=["subject", "relation", "object", "confidence"])
def on_clear():
return "", EMPTY_DF.copy(), "" # ✅ never return None
def on_export(data: pd.DataFrame, base: str):
if data is None or data.empty:
return "# (No rows to export)"
return to_turtle(data, base or "https://example.org/")
def on_preview(ttl_text: str):
return turtle_to_pyvis_html(ttl_text)
btn.click(on_extract, inputs=text_in, outputs=df)
btn_clear.click(on_clear, outputs=[text_in, df, turtle_out])
btn_export.click(on_export, inputs=[df, base_ns], outputs=turtle_out)
# NEW: wire up preview
btn_preview.click(on_preview, inputs=turtle_out, outputs=graph_html)
# For Hugging Face Spaces, expose on 0.0.0.0 and allow shareable link
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)