Phani-ISB commited on
Commit
e76c3e2
Β·
0 Parent(s):

Initial commit

Browse files
Files changed (3) hide show
  1. DockerFile.txt +20 -0
  2. Graphs.py +172 -0
  3. requirements.txt.txt +6 -0
DockerFile.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python 3.11 slim image
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory inside container
5
+ WORKDIR /app
6
+
7
+ # Copy requirements first (for caching)
8
+ COPY requirements.txt .
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy app code into container
14
+ COPY Graphs.py .
15
+
16
+ # Expose the default Streamlit port
17
+ EXPOSE 8501
18
+
19
+ # Run Streamlit app
20
+ CMD ["streamlit", "run", "Graphs.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true"]
Graphs.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Learn with Knowledge Graphs.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/16UX6wbUmaLG6YBJKzH5YouNYYnw2mL8H
8
+ """
9
+
10
+ # app.py
11
+ import streamlit as st
12
+ import wikipediaapi
13
+ import requests, json
14
+ import networkx as nx
15
+ import matplotlib.pyplot as plt
16
+ from neo4j import GraphDatabase
17
+
18
+ # ---------------------------
19
+ # CONFIGURATION
20
+ # ---------------------------
21
+ # API Key for Perplexity
22
+ PPLX_API_KEY = st.secrets.get("PPLX_API_KEY", "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")
23
+
24
+ # Optional Neo4j credentials (leave empty if not using Neo4j)
25
+ NEO4J_URI = st.secrets.get("NEO4J_URI", "neo4j+s://1a780c1e.databases.neo4j.io")
26
+ NEO4J_USER = st.secrets.get("NEO4J_USER", "neo4j")
27
+ NEO4J_PASSWORD = st.secrets.get("NEO4J_PASSWORD", "Xaabk9z1r5J-DPK6JPOH5QuOHL_MrTeFytx2c4sxjN4")
28
+
29
+ driver = None
30
+ if NEO4J_URI and NEO4J_USER and NEO4J_PASSWORD:
31
+ driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
32
+
33
+
34
+ # ---------------------------
35
+ # FUNCTIONS
36
+ # ---------------------------
37
+ def perplexity_chat(prompt, model="sonar-medium-online"):
38
+ url = "https://api.perplexity.ai/chat/completions"
39
+ headers = {
40
+ "Authorization": f"Bearer {PPLX_API_KEY}",
41
+ "Content-Type": "application/json",
42
+ }
43
+ data = {
44
+ "model": model,
45
+ "messages": [{"role": "user", "content": prompt}],
46
+ "temperature": 0,
47
+ }
48
+ resp = requests.post(url, headers=headers, data=json.dumps(data))
49
+ if resp.status_code != 200:
50
+ return f"❌ Error {resp.status_code}: {resp.text}"
51
+ return resp.json()["choices"][0]["message"]["content"]
52
+
53
+
54
+ def extract_triples_from_chunk(text, max_triples=5):
55
+ prompt = f"""Extract up to {max_triples} subject-predicate-object triples
56
+ from the text below. Return only triples in the format (subject, predicate, object).
57
+
58
+ Text: {text}"""
59
+
60
+ content = perplexity_chat(prompt)
61
+ triples = []
62
+ for line in content.splitlines():
63
+ line = line.strip(" ()[]{}")
64
+ if not line:
65
+ continue
66
+ parts = [p.strip() for p in line.split(",")]
67
+ if len(parts) == 3:
68
+ triples.append(tuple(parts))
69
+ return triples
70
+
71
+
72
+ def build_kg_from_wiki_title(title, lang="en", chunk_chars=800, max_triples_per_chunk=5):
73
+ wiki = wikipediaapi.Wikipedia(lang)
74
+ page = wiki.page(title)
75
+ if not page.exists():
76
+ return []
77
+
78
+ text = page.text
79
+ chunks = [text[i:i+chunk_chars] for i in range(0, len(text), chunk_chars)]
80
+
81
+ triples = []
82
+ for chunk in chunks:
83
+ chunk_triples = extract_triples_from_chunk(chunk, max_triples=max_triples_per_chunk)
84
+ triples.extend(chunk_triples)
85
+
86
+ return triples
87
+
88
+
89
+ def insert_triple(tx, subject, predicate, obj):
90
+ tx.run(
91
+ """
92
+ MERGE (s:Entity {name: $subject})
93
+ MERGE (o:Entity {name: $object})
94
+ MERGE (s)-[:RELATION {type: $predicate}]->(o)
95
+ """,
96
+ subject=subject, predicate=predicate, object=obj
97
+ )
98
+
99
+ def insert_triples(triples):
100
+ if not driver:
101
+ return
102
+ with driver.session() as session:
103
+ for s, p, o in triples:
104
+ session.execute_write(insert_triple, s, p, o)
105
+
106
+
107
+ def answer_with_kg(question, triples, top_k=10, model="sonar-medium-online"):
108
+ context_triples = triples[:top_k]
109
+ context_str = "\n".join([f"({s}, {p}, {o})" for s, p, o in context_triples])
110
+
111
+ prompt = f"""
112
+ You are a QA assistant.
113
+ Use the following knowledge graph triples as context to answer the question.
114
+
115
+ Knowledge Graph Triples:
116
+ {context_str}
117
+
118
+ Question: {question}
119
+
120
+ Answer in a clear, concise way. If you don't find enough info in triples,
121
+ say 'Not found in knowledge graph'.
122
+ """
123
+ return perplexity_chat(prompt, model=model)
124
+
125
+
126
+ # ---------------------------
127
+ # STREAMLIT APP
128
+ # ---------------------------
129
+ st.title("πŸ“š Knowledge Graph Chatbot (Wikipedia + Perplexity)")
130
+
131
+ # Input for Wikipedia Title
132
+ title = st.text_input("Enter a Wikipedia Title (e.g., Harry Potter):")
133
+
134
+ if title:
135
+ st.write(f"πŸ” Building Knowledge Graph for: **{title}** ...")
136
+ triples = build_kg_from_wiki_title(title)
137
+
138
+ if triples:
139
+ st.success(f"Extracted {len(triples)} triples βœ…")
140
+
141
+ # Save in Neo4j if configured
142
+ if driver:
143
+ insert_triples(triples)
144
+ st.info("πŸ“‘ Triples also stored in Neo4j.")
145
+
146
+ # Show sample triples
147
+ st.subheader("Sample Triples")
148
+ st.json(triples[:10])
149
+
150
+ # Visualization inside Streamlit
151
+ st.subheader("Graph Visualization")
152
+ G = nx.DiGraph()
153
+ for s, p, o in triples[:30]:
154
+ G.add_edge(s, o, label=p)
155
+
156
+ plt.figure(figsize=(12, 8))
157
+ pos = nx.spring_layout(G, k=0.5)
158
+ nx.draw(G, pos, with_labels=True, node_size=2500, node_color="lightblue",
159
+ font_size=10, font_weight="bold", arrows=True)
160
+ edge_labels = nx.get_edge_attributes(G, 'label')
161
+ nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
162
+ st.pyplot(plt)
163
+
164
+ # Chat interface
165
+ st.subheader("πŸ’¬ Ask Questions")
166
+ user_question = st.text_input("Your question:")
167
+ if user_question:
168
+ answer = answer_with_kg(user_question, triples)
169
+ st.write("πŸ€–", answer)
170
+ else:
171
+ st.error("Page not found or no triples extracted.")
172
+
requirements.txt.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit==1.36.0
2
+ wikipedia-api==0.6.0
3
+ requests==2.32.3
4
+ neo4j==5.22.0
5
+ networkx==3.3
6
+ matplotlib==3.9.2