jaibadachiya commited on
Commit
c641539
·
verified ·
1 Parent(s): 6384008

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -24
app.py CHANGED
@@ -1,4 +1,5 @@
1
  # app.py
 
2
  import streamlit as st
3
  import spacy
4
  import subprocess
@@ -13,20 +14,27 @@ def install_spacy_model():
13
  spacy.load("en_core_web_sm")
14
  except OSError:
15
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
 
 
16
  install_spacy_model()
17
 
18
  # Load spaCy model
19
  nlp = spacy.load("en_core_web_sm")
20
 
21
  # === Neo4j credentials ===
22
- uri = "neo4j+s://ff701b1c.databases.neo4j.io"
23
- username = "neo4j"
24
- password = "BfZM7YRKpFz1b_V7acAmOtaSQHPU9xK03rJlfPep88g"
25
 
26
- # Connect to Neo4j
27
- driver = GraphDatabase.driver(uri, auth=(username, password))
 
 
 
 
 
28
 
29
- # === TF-IDF Filtering ===
30
  def compute_tfidf_keywords(text: str, top_n=100):
31
  vectorizer = TfidfVectorizer(stop_words='english')
32
  X = vectorizer.fit_transform([text])
@@ -34,10 +42,10 @@ def compute_tfidf_keywords(text: str, top_n=100):
34
  sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)
35
  return {word for word, _ in sorted_scores[:top_n]}
36
 
37
- # === Enhanced Triple Extraction ===
38
- def extract_triples(text):
39
  doc = nlp(text)
40
- tfidf_keywords = compute_tfidf_keywords(text)
41
  triples = []
42
 
43
  for sent in doc.sents:
@@ -51,44 +59,56 @@ def extract_triples(text):
51
  verb = root[0].lemma_
52
 
53
  for chunk in noun_chunks:
54
- if chunk.root.dep_ in ("nsubj", "nsubjpass"):
55
  subject = chunk.text
56
- elif chunk.root.dep_ in ("dobj", "pobj", "attr"):
57
  obj = chunk.text
58
 
59
  if subject and verb and obj:
60
- triples.append((subject.strip(), verb.strip(), obj.strip()))
 
 
 
 
61
 
62
  return triples
63
 
64
  # === Visualization Function ===
65
  def show_graph(triples):
 
 
 
 
66
  G = nx.DiGraph()
67
  for s, p, o in triples:
68
  G.add_node(s)
69
  G.add_node(o)
70
  G.add_edge(s, o, label=p)
71
- pos = nx.spring_layout(G)
 
72
  plt.figure(figsize=(10, 8))
73
- nx.draw(G, pos, with_labels=True, node_color='skyblue', node_size=2000, font_size=10)
74
  nx.draw_networkx_edge_labels(G, pos, edge_labels={(u, v): d['label'] for u, v, d in G.edges(data=True)})
75
- st.pyplot(plt)
 
76
 
77
  # === Streamlit UI ===
78
- st.title("🧠 Knowledge Graph Generator:")
79
 
80
- text_input = st.text_area("Paste your text here", height=200)
 
81
 
82
  if st.button("Generate Graph"):
83
  if text_input:
84
- all_triples = extract_triples(text_input)
85
 
86
- # Display only the first 10
87
- st.write("### Extracted Triples (showing top 10)")
88
- for t in all_triples[:10]:
89
- st.write("🔗", t)
90
 
91
- # Visualize all triples
92
- show_graph(all_triples)
 
93
  else:
94
  st.warning("Please enter some text.")
 
1
  # app.py
2
+
3
  import streamlit as st
4
  import spacy
5
  import subprocess
 
14
  spacy.load("en_core_web_sm")
15
  except OSError:
16
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
17
+ spacy.load("en_core_web_sm")
18
+
19
  install_spacy_model()
20
 
21
  # Load spaCy model
22
  nlp = spacy.load("en_core_web_sm")
23
 
24
  # === Neo4j credentials ===
25
+ NEO4J_URI = "neo4j+s://ff701b1c.databases.neo4j.io"
26
+ NEO4J_USERNAME = "neo4j"
27
+ NEO4J_PASSWORD = "BfZM7YRKpFz1b_V7acAmOtaSQHPU9xK03rJlfPep88g"
28
 
29
+ def get_neo4j_driver():
30
+ try:
31
+ driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
32
+ return driver
33
+ except Exception as e:
34
+ st.error(f"Failed to connect to Neo4j: {e}")
35
+ return None
36
 
37
+ # === TF-IDF Filtering (Optional for noise reduction) ===
38
  def compute_tfidf_keywords(text: str, top_n=100):
39
  vectorizer = TfidfVectorizer(stop_words='english')
40
  X = vectorizer.fit_transform([text])
 
42
  sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)
43
  return {word for word, _ in sorted_scores[:top_n]}
44
 
45
+ # === Triple Extraction ===
46
+ def extract_triples(text, use_tfidf=False):
47
  doc = nlp(text)
48
+ tfidf_keywords = compute_tfidf_keywords(text) if use_tfidf else None
49
  triples = []
50
 
51
  for sent in doc.sents:
 
59
  verb = root[0].lemma_
60
 
61
  for chunk in noun_chunks:
62
+ if chunk.root.dep_ in ("nsubj", "nsubjpass") and not subject:
63
  subject = chunk.text
64
+ elif chunk.root.dep_ in ("dobj", "pobj", "attr") and not obj:
65
  obj = chunk.text
66
 
67
  if subject and verb and obj:
68
+ if tfidf_keywords:
69
+ if subject.lower() in tfidf_keywords or obj.lower() in tfidf_keywords:
70
+ triples.append((subject.strip(), verb.strip(), obj.strip()))
71
+ else:
72
+ triples.append((subject.strip(), verb.strip(), obj.strip()))
73
 
74
  return triples
75
 
76
  # === Visualization Function ===
77
  def show_graph(triples):
78
+ if not triples:
79
+ st.warning("No triples found to visualize.")
80
+ return
81
+
82
  G = nx.DiGraph()
83
  for s, p, o in triples:
84
  G.add_node(s)
85
  G.add_node(o)
86
  G.add_edge(s, o, label=p)
87
+
88
+ pos = nx.spring_layout(G, seed=42) # fixed layout
89
  plt.figure(figsize=(10, 8))
90
+ nx.draw(G, pos, with_labels=True, node_color='skyblue', node_size=2000, font_size=10, edge_color='gray')
91
  nx.draw_networkx_edge_labels(G, pos, edge_labels={(u, v): d['label'] for u, v, d in G.edges(data=True)})
92
+ st.pyplot(plt.gcf())
93
+ plt.clf()
94
 
95
  # === Streamlit UI ===
96
+ st.title("🧠 Knowledge Graph Generator")
97
 
98
+ text_input = st.text_area("Paste your text here:", height=200)
99
+ use_tfidf = st.checkbox("Use TF-IDF filtering (Optional: Recommended for large texts)")
100
 
101
  if st.button("Generate Graph"):
102
  if text_input:
103
+ all_triples = extract_triples(text_input, use_tfidf=use_tfidf)
104
 
105
+ if all_triples:
106
+ st.subheader("🔗 Extracted Triples:")
107
+ for triple in all_triples:
108
+ st.markdown(f"- **({triple[0]} → {triple[1]} → {triple[2]})**")
109
 
110
+ show_graph(all_triples)
111
+ else:
112
+ st.warning("No valid triples could be extracted. Try different text.")
113
  else:
114
  st.warning("Please enter some text.")