telgraf / app.py
cihatyldz's picture
Upload app.py with huggingface_hub
1f864dd verified
import gradio as gr
import json
import os
import re
import time
from collections import defaultdict
import networkx as nx
from community.community_louvain import best_partition
import plotly.graph_objects as go
import chromadb
from sentence_transformers import SentenceTransformer
from huggingface_hub import InferenceClient, hf_hub_download
# ============================================================
# CONFIG
# ============================================================
HF_TOKEN = os.environ.get("HF_TOKEN")
LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct"
EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
DATASET_REPO = "cihatyldz/telgraf-telconet-dataset"
llm_client = InferenceClient(token=HF_TOKEN)
embed_model = SentenceTransformer(EMBEDDING_MODEL)
# ============================================================
# LOAD DATA
# ============================================================
DOC_FILES = [
"data/01_kurumsal_yapi.txt",
"data/02_ag_altyapisi.txt",
"data/03_ariza_kayitlari.txt",
"data/04_sla_musteri.txt",
"data/05_teknoloji_yol_haritasi.txt"
]
documents = {}
for f in DOC_FILES:
path = hf_hub_download(repo_id=DATASET_REPO, filename=f, repo_type="dataset", token=HF_TOKEN)
name = f.split("/")[-1].replace(".txt", "")
with open(path, "r", encoding="utf-8") as fp:
documents[name] = fp.read()
graph_path = hf_hub_download(repo_id=DATASET_REPO, filename="graph_data.json", repo_type="dataset", token=HF_TOKEN)
with open(graph_path, "r", encoding="utf-8") as fp:
graph_data = json.load(fp)
print(f"Loaded {len(documents)} documents, {len(graph_data['entities'])} entities")
# ============================================================
# CHUNKING & INDEX
# ============================================================
def semantic_chunk(text, max_chunk_size=600):
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
chunks, current = [], ""
for para in paragraphs:
if len(current) + len(para) <= max_chunk_size:
current += para + " "
else:
if current:
chunks.append(current.strip())
current = para + " "
if current.strip():
chunks.append(current.strip())
return chunks
all_chunks, chunk_metadata = [], []
for doc_name, doc_text in documents.items():
for i, chunk in enumerate(semantic_chunk(doc_text)):
all_chunks.append(chunk)
chunk_metadata.append({"doc_name": doc_name, "chunk_id": f"{doc_name}_chunk_{i}"})
# ChromaDB
chroma_client = chromadb.Client()
collection = chroma_client.create_collection(name="telconet", metadata={"hnsw:space": "cosine"})
embeddings = embed_model.encode(all_chunks).tolist()
collection.add(
ids=[m["chunk_id"] for m in chunk_metadata],
documents=all_chunks,
embeddings=embeddings,
metadatas=chunk_metadata
)
# ============================================================
# REBUILD GRAPH
# ============================================================
def normalize_name(name):
return name.strip().lower().replace(" ", " ")
entity_map = {}
for e in graph_data["entities"]:
key = normalize_name(e["name"])
entity_map[key] = e
G = nx.Graph()
for key, info in entity_map.items():
G.add_node(key, label=info["name"], type=info.get("type", "UNKNOWN"))
for r in graph_data["relationships"]:
src = normalize_name(r.get("source", ""))
tgt = normalize_name(r.get("target", ""))
if src in entity_map and tgt in entity_map:
G.add_edge(src, tgt, relation=r.get("relation", ""))
partition = {k: int(v) for k, v in graph_data.get("partition", {}).items() if k in G}
community_summaries = graph_data.get("community_summaries", {})
# ============================================================
# QUERY FUNCTIONS
# ============================================================
def llm_generate(prompt, max_tokens=500):
resp = llm_client.chat_completion(
model=LLM_MODEL,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=0.2,
)
return resp.choices[0].message.content
def standard_rag_query(question, top_k=5):
q_emb = embed_model.encode([question]).tolist()
results = collection.query(query_embeddings=q_emb, n_results=top_k)
context = "\n\n---\n\n".join(results["documents"][0])
prompt = f"""Aşağıdaki bağlam bilgilerini kullanarak soruyu Türkçe yanıtla.
Sadece bağlamda bulunan bilgileri kullan.
Bağlam:
{context}
Soru: {question}
Yanıt:"""
answer = llm_generate(prompt)
sources = list(set(s["doc_name"] for s in results["metadatas"][0]))
return answer, f"Kaynak: {', '.join(sources)} | {top_k} chunk kullanıldı"
def graphrag_query(question, top_k_comm=3, top_k_chunks=3):
q_emb = embed_model.encode([question])[0]
comm_scores = {}
for cid, summary in community_summaries.items():
s_emb = embed_model.encode([summary])[0]
score = float(q_emb @ s_emb) / (float((q_emb**2).sum()**0.5) * float((s_emb**2).sum()**0.5))
comm_scores[cid] = score
top_comms = sorted(comm_scores.items(), key=lambda x: -x[1])[:top_k_comm]
relevant_nodes = set()
for cid, _ in top_comms:
for n, c in partition.items():
if c == int(cid):
relevant_nodes.add(n)
for nb in G.neighbors(n) if n in G else []:
relevant_nodes.add(nb)
parts = []
for cid, score in top_comms:
parts.append(f"[Community {cid} - Skor: {score:.2f}]\n{community_summaries.get(str(cid), '')}")
rels = []
for u, v, d in G.edges(data=True):
if u in relevant_nodes or v in relevant_nodes:
rels.append(f"{entity_map.get(u,{}).get('name',u)} → {d.get('relation','?')}{entity_map.get(v,{}).get('name',v)}")
if rels:
parts.append("\nİlişkiler:\n" + "\n".join(rels[:30]))
q_emb_list = embed_model.encode([question]).tolist()
chunk_res = collection.query(query_embeddings=q_emb_list, n_results=top_k_chunks)
for ch in chunk_res["documents"][0]:
parts.append(f"\n[Metin]\n{ch}")
prompt = f"""Bilgi grafiği ve bağlamı kullanarak soruyu Türkçe yanıtla. İlişkileri takip ederek multi-hop çıkarımlar yap.
Bağlam:
{"\n\n".join(parts)}
Soru: {question}
Yanıt:"""
answer = llm_generate(prompt)
info = f"Community: {[c[0] for c in top_comms]} | {len(relevant_nodes)} node, {len(rels)} ilişki"
return answer, info
# ============================================================
# GRAPH VISUALIZATION
# ============================================================
def build_graph_figure():
if G.number_of_nodes() == 0:
return go.Figure()
pos = nx.spring_layout(G, k=2, iterations=50, seed=42)
colors = ['#FF6B6B','#4ECDC4','#45B7D1','#96CEB4','#FFEAA7','#DDA0DD','#98D8C8','#F7DC6F','#BB8FCE','#85C1E9']
edge_x, edge_y = [], []
for u, v in G.edges():
x0,y0 = pos[u]; x1,y1 = pos[v]
edge_x.extend([x0,x1,None]); edge_y.extend([y0,y1,None])
fig = go.Figure()
fig.add_trace(go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#ccc'), mode='lines', hoverinfo='none', showlegend=False))
comm_set = set(partition.values()) if partition else {0}
for cid in comm_set:
nodes = [n for n,c in partition.items() if c == cid and n in pos]
if not nodes:
continue
fig.add_trace(go.Scatter(
x=[pos[n][0] for n in nodes], y=[pos[n][1] for n in nodes],
mode='markers+text',
text=[entity_map.get(n,{}).get("name",n)[:18] for n in nodes],
textposition="top center", textfont=dict(size=7),
hovertext=[f"<b>{entity_map.get(n,{}).get('name',n)}</b><br>Tür: {entity_map.get(n,{}).get('type','?')}<br>Bağlantı: {G.degree(n)}" for n in nodes],
hoverinfo='text',
marker=dict(size=[max(8,min(35,G.degree(n)*3)) for n in nodes], color=colors[cid%len(colors)], line=dict(width=1,color='white')),
name=f'Community {cid}'
))
fig.update_layout(title='🔌 Telgraf — TelcoNet Knowledge Graph', showlegend=True,
xaxis=dict(showgrid=False,zeroline=False,showticklabels=False),
yaxis=dict(showgrid=False,zeroline=False,showticklabels=False),
plot_bgcolor='white', height=600)
return fig
GRAPH_FIG = build_graph_figure()
# ============================================================
# GRADIO UI
# ============================================================
EXAMPLE_QUESTIONS = [
"TelcoNet'in CEO'su kimdir?",
"Bolu fiber kesintisinden hangi müşteriler etkilendi ve toplam tazminat ne kadar?",
"CTO Elif Demir'e bağlı departman müdürleri kimler ve hangi projelerin sponsorluğunu yapıyor?",
"Ericsson'un TelcoNet'teki tüm rolleri neler?",
"DDoS saldırısına kim müdahale etti ve sonrasında hangi projeler başlatıldı?",
"TelcoNet'in 2026 en büyük teknoloji riskleri ve başlatılan projeler nelerdir?"
]
def compare(question):
rag_ans, rag_info = standard_rag_query(question)
graph_ans, graph_info = graphrag_query(question)
return rag_ans, rag_info, graph_ans, graph_info
def show_graph():
return GRAPH_FIG
with gr.Blocks(title="🔌 Telgraf", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🔌 Telgraf — GraphRAG vs Standart RAG")
gr.Markdown("Telekom domain'inde Knowledge Graph tabanlı bilgi erişimi karşılaştırması")
with gr.Tab("⚔️ Karşılaştır"):
question = gr.Textbox(label="Sorunuzu yazın", placeholder="Örn: Bolu fiber kesintisinin etkileri neler?", lines=2)
gr.Examples(examples=[[q] for q in EXAMPLE_QUESTIONS], inputs=question)
btn = gr.Button("🔍 Karşılaştır", variant="primary")
with gr.Row():
with gr.Column():
gr.Markdown("### 📘 Standart RAG")
rag_answer = gr.Textbox(label="Cevap", lines=10)
rag_meta = gr.Textbox(label="Detay", lines=2)
with gr.Column():
gr.Markdown("### 📗 GraphRAG")
graph_answer = gr.Textbox(label="Cevap", lines=10)
graph_meta = gr.Textbox(label="Detay", lines=2)
btn.click(compare, inputs=question, outputs=[rag_answer, rag_meta, graph_answer, graph_meta])
with gr.Tab("🕸️ Knowledge Graph"):
gr.Markdown("### İnteraktif Bilgi Grafiği")
graph_btn = gr.Button("📊 Grafiği Göster")
graph_plot = gr.Plot()
graph_btn.click(show_graph, outputs=graph_plot)
with gr.Tab("📄 Dokümanlar"):
for name, text in documents.items():
with gr.Accordion(name, open=False):
gr.Textbox(value=text, lines=15, interactive=False)
with gr.Tab("ℹ️ Hakkında"):
gr.Markdown("""
## 🔌 Telgraf Projesi
**GraphRAG vs Standart RAG** karşılaştırma demo'su.
**Kurgusal şirket:** TelcoNet A.Ş. (Türk telekom operatörü)
| Özellik | Standart RAG | GraphRAG |
|---|---|---|
| Basit sorular | ✅ İyi | ✅ İyi |
| Multi-hop | ❌ Zayıf | ✅ Güçlü |
| İlişki zinciri | ❌ Kuramıyor | ✅ Graph traversal |
| Global özet | ❌ Yetersiz | ✅ Community summaries |
**Tech:** Qwen2.5-7B, ChromaDB, NetworkX, Sentence-Transformers
**Geliştiren:** [Cihat Yıldız](https://huggingface.co/cihatyldz)
""")
demo.launch()