Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -64,18 +64,13 @@
|
|
| 64 |
|
| 65 |
|
| 66 |
# app.py
|
| 67 |
-
|
| 68 |
-
import re
|
| 69 |
-
import gradio as gr
|
| 70 |
-
import tempfile
|
| 71 |
from collections import defaultdict, Counter
|
| 72 |
from pyvis.network import Network
|
| 73 |
from rdflib import Graph, Namespace, URIRef
|
| 74 |
from rdflib.namespace import RDF
|
| 75 |
-
import json
|
| 76 |
-
import numpy as np
|
| 77 |
from sentence_transformers import SentenceTransformer, util
|
| 78 |
-
import requests
|
| 79 |
|
| 80 |
# -----------------------------
|
| 81 |
# Hugging Face Inference helper
|
|
@@ -109,28 +104,27 @@ RELATION_PATTERNS = [
|
|
| 109 |
]
|
| 110 |
|
| 111 |
def extract_graph(text):
|
| 112 |
-
# Use HF NER model (CPU-friendly)
|
| 113 |
entities = re.findall(ENTITY_REGEX, text)
|
| 114 |
edges = []
|
| 115 |
for pat, label in RELATION_PATTERNS:
|
| 116 |
for m in pat.finditer(text):
|
| 117 |
src, _, dst = m.groups()
|
| 118 |
edges.append((src, dst, label))
|
| 119 |
-
entities.extend([src,dst])
|
| 120 |
return sorted(set(entities)), edges
|
| 121 |
|
| 122 |
def render_pyvis(entities, edges):
|
| 123 |
-
net = Network(height="400px", width="100%", directed=True, bgcolor="#111", font_color="#eee")
|
| 124 |
net.barnes_hut(gravity=-20000, central_gravity=0.3, spring_length=160, spring_strength=0.05)
|
| 125 |
for e in entities:
|
| 126 |
net.add_node(e, label=e, title=e)
|
| 127 |
-
for s,d,r in edges:
|
| 128 |
-
net.add_edge(s,d,title=r,label=r)
|
| 129 |
with tempfile.TemporaryDirectory() as td:
|
| 130 |
path = f"{td}/graph.html"
|
| 131 |
-
net.
|
| 132 |
-
with open(path,"r",encoding="utf-8") as f:
|
| 133 |
-
html=f.read()
|
| 134 |
return html
|
| 135 |
|
| 136 |
# -----------------------------
|
|
@@ -140,7 +134,6 @@ ORG = Namespace("http://example.org/org/")
|
|
| 140 |
REL = Namespace("http://example.org/rel/")
|
| 141 |
LOC = Namespace("http://example.org/loc/")
|
| 142 |
PER = Namespace("http://example.org/per/")
|
| 143 |
-
|
| 144 |
PATTERNS = RELATION_PATTERNS
|
| 145 |
|
| 146 |
def triples_from_text(text):
|
|
@@ -203,50 +196,76 @@ def hybrid_search(query, k=5):
|
|
| 203 |
|
| 204 |
def synthesize_answer(query, hits):
|
| 205 |
context="\n".join(ch for ch,_ in hits)
|
| 206 |
-
# Use small HF LLM for answer synthesis
|
| 207 |
ans = hf_inference("google/flan-t5-small", f"Answer the question based on context:\nContext: {context}\nQuestion: {query}")
|
| 208 |
return ans
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
# -----------------------------
|
| 211 |
# Gradio UI
|
| 212 |
# -----------------------------
|
| 213 |
with gr.Blocks(title="All-in-One Graph Demo") as demo:
|
| 214 |
-
gr.Markdown("# All 4 Graph Demos in One")
|
| 215 |
-
|
| 216 |
inp_text = gr.Textbox(lines=10, label="Paste your paragraph")
|
|
|
|
| 217 |
submit_btn = gr.Button("Process Everything")
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
def pipeline_all(text, q):
|
| 232 |
-
# GraphGPT
|
| 233 |
ents, rels = extract_graph(text)
|
| 234 |
html_graph = render_pyvis(ents, rels)
|
| 235 |
-
|
|
|
|
| 236 |
g = triples_from_text(text)
|
| 237 |
ttl = g.serialize(format="turtle")
|
| 238 |
jsonld = g.serialize(format="json-ld", indent=2)
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
texts_store.append(text)
|
| 242 |
build_index(texts_store)
|
| 243 |
-
hits = hybrid_search(q)
|
| 244 |
-
ans = synthesize_answer(q, hits)
|
| 245 |
stats_txt = f"GraphRAG Chunks: {len(chunks)}, Graph edges: {len(edges)}"
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
if __name__=="__main__":
|
| 252 |
-
demo.launch()
|
|
|
|
| 64 |
|
| 65 |
|
| 66 |
# app.py
|
| 67 |
+
# app.py
|
| 68 |
+
import os, re, gradio as gr, tempfile, json, requests, numpy as np
|
|
|
|
|
|
|
| 69 |
from collections import defaultdict, Counter
|
| 70 |
from pyvis.network import Network
|
| 71 |
from rdflib import Graph, Namespace, URIRef
|
| 72 |
from rdflib.namespace import RDF
|
|
|
|
|
|
|
| 73 |
from sentence_transformers import SentenceTransformer, util
|
|
|
|
| 74 |
|
| 75 |
# -----------------------------
|
| 76 |
# Hugging Face Inference helper
|
|
|
|
| 104 |
]
|
| 105 |
|
| 106 |
def extract_graph(text):
|
|
|
|
| 107 |
entities = re.findall(ENTITY_REGEX, text)
|
| 108 |
edges = []
|
| 109 |
for pat, label in RELATION_PATTERNS:
|
| 110 |
for m in pat.finditer(text):
|
| 111 |
src, _, dst = m.groups()
|
| 112 |
edges.append((src, dst, label))
|
| 113 |
+
entities.extend([src, dst])
|
| 114 |
return sorted(set(entities)), edges
|
| 115 |
|
| 116 |
def render_pyvis(entities, edges):
|
| 117 |
+
net = Network(height="400px", width="100%", directed=True, bgcolor="#111", font_color="#eee", notebook=False, cdn_resources="in_line")
|
| 118 |
net.barnes_hut(gravity=-20000, central_gravity=0.3, spring_length=160, spring_strength=0.05)
|
| 119 |
for e in entities:
|
| 120 |
net.add_node(e, label=e, title=e)
|
| 121 |
+
for s, d, r in edges:
|
| 122 |
+
net.add_edge(s, d, title=r, label=r)
|
| 123 |
with tempfile.TemporaryDirectory() as td:
|
| 124 |
path = f"{td}/graph.html"
|
| 125 |
+
net.write_html(path)
|
| 126 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 127 |
+
html = f.read()
|
| 128 |
return html
|
| 129 |
|
| 130 |
# -----------------------------
|
|
|
|
| 134 |
REL = Namespace("http://example.org/rel/")
|
| 135 |
LOC = Namespace("http://example.org/loc/")
|
| 136 |
PER = Namespace("http://example.org/per/")
|
|
|
|
| 137 |
PATTERNS = RELATION_PATTERNS
|
| 138 |
|
| 139 |
def triples_from_text(text):
|
|
|
|
| 196 |
|
| 197 |
def synthesize_answer(query, hits):
|
| 198 |
context="\n".join(ch for ch,_ in hits)
|
|
|
|
| 199 |
ans = hf_inference("google/flan-t5-small", f"Answer the question based on context:\nContext: {context}\nQuestion: {query}")
|
| 200 |
return ans
|
| 201 |
|
| 202 |
+
# -----------------------------
|
| 203 |
+
# 4) NLP Pipeline (Summarize, Classify, NER)
|
| 204 |
+
# -----------------------------
|
| 205 |
+
def summarize_text(text):
|
| 206 |
+
return hf_inference("facebook/bart-large-cnn", text)
|
| 207 |
+
|
| 208 |
+
def classify_text(text):
|
| 209 |
+
return hf_inference("facebook/bart-large-mnli", f"Classify: {text}")
|
| 210 |
+
|
| 211 |
+
def ner_text(text):
|
| 212 |
+
return hf_inference("dslim/bert-base-NER", text, task="token-classification")
|
| 213 |
+
|
| 214 |
# -----------------------------
|
| 215 |
# Gradio UI
|
| 216 |
# -----------------------------
|
| 217 |
with gr.Blocks(title="All-in-One Graph Demo") as demo:
|
| 218 |
+
gr.Markdown("# 🧠 All 4 Graph + NLP Demos in One")
|
| 219 |
+
|
| 220 |
inp_text = gr.Textbox(lines=10, label="Paste your paragraph")
|
| 221 |
+
question = gr.Textbox(label="Ask a question (for GraphRAG QA)")
|
| 222 |
submit_btn = gr.Button("Process Everything")
|
| 223 |
+
|
| 224 |
+
with gr.Tab("GraphGPT"):
|
| 225 |
+
graph_html = gr.HTML()
|
| 226 |
+
|
| 227 |
+
with gr.Tab("KnowledgeGraphGPT"):
|
| 228 |
+
out_ttl = gr.Code(label="RDF Turtle")
|
| 229 |
+
out_jsonld = gr.Code(label="RDF JSON-LD")
|
| 230 |
+
|
| 231 |
+
with gr.Tab("GraphRAG"):
|
| 232 |
+
stats = gr.Markdown()
|
| 233 |
+
answer = gr.Textbox(label="Answer")
|
| 234 |
+
|
| 235 |
+
with gr.Tab("NLP Pipeline"):
|
| 236 |
+
summary_out = gr.Textbox(label="Summarization")
|
| 237 |
+
classify_out = gr.Textbox(label="Classification")
|
| 238 |
+
ner_out = gr.Textbox(label="NER")
|
| 239 |
+
|
| 240 |
def pipeline_all(text, q):
|
| 241 |
+
# 1) GraphGPT
|
| 242 |
ents, rels = extract_graph(text)
|
| 243 |
html_graph = render_pyvis(ents, rels)
|
| 244 |
+
|
| 245 |
+
# 2) KnowledgeGraphGPT
|
| 246 |
g = triples_from_text(text)
|
| 247 |
ttl = g.serialize(format="turtle")
|
| 248 |
jsonld = g.serialize(format="json-ld", indent=2)
|
| 249 |
+
|
| 250 |
+
# 3) GraphRAG
|
| 251 |
+
texts_store.clear(); texts_store.append(text)
|
| 252 |
build_index(texts_store)
|
| 253 |
+
hits = hybrid_search(q if q.strip() else "What is this text about?")
|
| 254 |
+
ans = synthesize_answer(q if q.strip() else "What is this text about?", hits)
|
| 255 |
stats_txt = f"GraphRAG Chunks: {len(chunks)}, Graph edges: {len(edges)}"
|
| 256 |
+
|
| 257 |
+
# 4) NLP Pipeline
|
| 258 |
+
summary = summarize_text(text)
|
| 259 |
+
classification = classify_text(text)
|
| 260 |
+
entities = ner_text(text)
|
| 261 |
+
|
| 262 |
+
return html_graph, ttl, jsonld, stats_txt, ans, summary, classification, entities
|
| 263 |
+
|
| 264 |
+
submit_btn.click(
|
| 265 |
+
pipeline_all,
|
| 266 |
+
inputs=[inp_text, question],
|
| 267 |
+
outputs=[graph_html, out_ttl, out_jsonld, stats, answer, summary_out, classify_out, ner_out]
|
| 268 |
+
)
|
| 269 |
|
| 270 |
if __name__=="__main__":
|
| 271 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|