import gradio as gr import json import os import re import time from collections import defaultdict import networkx as nx from community.community_louvain import best_partition import plotly.graph_objects as go import chromadb from sentence_transformers import SentenceTransformer from huggingface_hub import InferenceClient, hf_hub_download # ============================================================ # CONFIG # ============================================================ HF_TOKEN = os.environ.get("HF_TOKEN") LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct" EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" DATASET_REPO = "cihatyldz/telgraf-telconet-dataset" llm_client = InferenceClient(token=HF_TOKEN) embed_model = SentenceTransformer(EMBEDDING_MODEL) # ============================================================ # LOAD DATA # ============================================================ DOC_FILES = [ "data/01_kurumsal_yapi.txt", "data/02_ag_altyapisi.txt", "data/03_ariza_kayitlari.txt", "data/04_sla_musteri.txt", "data/05_teknoloji_yol_haritasi.txt" ] documents = {} for f in DOC_FILES: path = hf_hub_download(repo_id=DATASET_REPO, filename=f, repo_type="dataset", token=HF_TOKEN) name = f.split("/")[-1].replace(".txt", "") with open(path, "r", encoding="utf-8") as fp: documents[name] = fp.read() graph_path = hf_hub_download(repo_id=DATASET_REPO, filename="graph_data.json", repo_type="dataset", token=HF_TOKEN) with open(graph_path, "r", encoding="utf-8") as fp: graph_data = json.load(fp) print(f"Loaded {len(documents)} documents, {len(graph_data['entities'])} entities") # ============================================================ # CHUNKING & INDEX # ============================================================ def semantic_chunk(text, max_chunk_size=600): paragraphs = [p.strip() for p in text.split("\n") if p.strip()] chunks, current = [], "" for para in paragraphs: if len(current) + len(para) <= max_chunk_size: current += para + " " else: if current: chunks.append(current.strip()) current = para + " " if current.strip(): chunks.append(current.strip()) return chunks all_chunks, chunk_metadata = [], [] for doc_name, doc_text in documents.items(): for i, chunk in enumerate(semantic_chunk(doc_text)): all_chunks.append(chunk) chunk_metadata.append({"doc_name": doc_name, "chunk_id": f"{doc_name}_chunk_{i}"}) # ChromaDB chroma_client = chromadb.Client() collection = chroma_client.create_collection(name="telconet", metadata={"hnsw:space": "cosine"}) embeddings = embed_model.encode(all_chunks).tolist() collection.add( ids=[m["chunk_id"] for m in chunk_metadata], documents=all_chunks, embeddings=embeddings, metadatas=chunk_metadata ) # ============================================================ # REBUILD GRAPH # ============================================================ def normalize_name(name): return name.strip().lower().replace(" ", " ") entity_map = {} for e in graph_data["entities"]: key = normalize_name(e["name"]) entity_map[key] = e G = nx.Graph() for key, info in entity_map.items(): G.add_node(key, label=info["name"], type=info.get("type", "UNKNOWN")) for r in graph_data["relationships"]: src = normalize_name(r.get("source", "")) tgt = normalize_name(r.get("target", "")) if src in entity_map and tgt in entity_map: G.add_edge(src, tgt, relation=r.get("relation", "")) partition = {k: int(v) for k, v in graph_data.get("partition", {}).items() if k in G} community_summaries = graph_data.get("community_summaries", {}) # ============================================================ # QUERY FUNCTIONS # ============================================================ def llm_generate(prompt, max_tokens=500): resp = llm_client.chat_completion( model=LLM_MODEL, messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, temperature=0.2, ) return resp.choices[0].message.content def standard_rag_query(question, top_k=5): q_emb = embed_model.encode([question]).tolist() results = collection.query(query_embeddings=q_emb, n_results=top_k) context = "\n\n---\n\n".join(results["documents"][0]) prompt = f"""Aşağıdaki bağlam bilgilerini kullanarak soruyu Türkçe yanıtla. Sadece bağlamda bulunan bilgileri kullan. Bağlam: {context} Soru: {question} Yanıt:""" answer = llm_generate(prompt) sources = list(set(s["doc_name"] for s in results["metadatas"][0])) return answer, f"Kaynak: {', '.join(sources)} | {top_k} chunk kullanıldı" def graphrag_query(question, top_k_comm=3, top_k_chunks=3): q_emb = embed_model.encode([question])[0] comm_scores = {} for cid, summary in community_summaries.items(): s_emb = embed_model.encode([summary])[0] score = float(q_emb @ s_emb) / (float((q_emb**2).sum()**0.5) * float((s_emb**2).sum()**0.5)) comm_scores[cid] = score top_comms = sorted(comm_scores.items(), key=lambda x: -x[1])[:top_k_comm] relevant_nodes = set() for cid, _ in top_comms: for n, c in partition.items(): if c == int(cid): relevant_nodes.add(n) for nb in G.neighbors(n) if n in G else []: relevant_nodes.add(nb) parts = [] for cid, score in top_comms: parts.append(f"[Community {cid} - Skor: {score:.2f}]\n{community_summaries.get(str(cid), '')}") rels = [] for u, v, d in G.edges(data=True): if u in relevant_nodes or v in relevant_nodes: rels.append(f"{entity_map.get(u,{}).get('name',u)} → {d.get('relation','?')} → {entity_map.get(v,{}).get('name',v)}") if rels: parts.append("\nİlişkiler:\n" + "\n".join(rels[:30])) q_emb_list = embed_model.encode([question]).tolist() chunk_res = collection.query(query_embeddings=q_emb_list, n_results=top_k_chunks) for ch in chunk_res["documents"][0]: parts.append(f"\n[Metin]\n{ch}") prompt = f"""Bilgi grafiği ve bağlamı kullanarak soruyu Türkçe yanıtla. İlişkileri takip ederek multi-hop çıkarımlar yap. Bağlam: {"\n\n".join(parts)} Soru: {question} Yanıt:""" answer = llm_generate(prompt) info = f"Community: {[c[0] for c in top_comms]} | {len(relevant_nodes)} node, {len(rels)} ilişki" return answer, info # ============================================================ # GRAPH VISUALIZATION # ============================================================ def build_graph_figure(): if G.number_of_nodes() == 0: return go.Figure() pos = nx.spring_layout(G, k=2, iterations=50, seed=42) colors = ['#FF6B6B','#4ECDC4','#45B7D1','#96CEB4','#FFEAA7','#DDA0DD','#98D8C8','#F7DC6F','#BB8FCE','#85C1E9'] edge_x, edge_y = [], [] for u, v in G.edges(): x0,y0 = pos[u]; x1,y1 = pos[v] edge_x.extend([x0,x1,None]); edge_y.extend([y0,y1,None]) fig = go.Figure() fig.add_trace(go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#ccc'), mode='lines', hoverinfo='none', showlegend=False)) comm_set = set(partition.values()) if partition else {0} for cid in comm_set: nodes = [n for n,c in partition.items() if c == cid and n in pos] if not nodes: continue fig.add_trace(go.Scatter( x=[pos[n][0] for n in nodes], y=[pos[n][1] for n in nodes], mode='markers+text', text=[entity_map.get(n,{}).get("name",n)[:18] for n in nodes], textposition="top center", textfont=dict(size=7), hovertext=[f"{entity_map.get(n,{}).get('name',n)}
Tür: {entity_map.get(n,{}).get('type','?')}
Bağlantı: {G.degree(n)}" for n in nodes], hoverinfo='text', marker=dict(size=[max(8,min(35,G.degree(n)*3)) for n in nodes], color=colors[cid%len(colors)], line=dict(width=1,color='white')), name=f'Community {cid}' )) fig.update_layout(title='🔌 Telgraf — TelcoNet Knowledge Graph', showlegend=True, xaxis=dict(showgrid=False,zeroline=False,showticklabels=False), yaxis=dict(showgrid=False,zeroline=False,showticklabels=False), plot_bgcolor='white', height=600) return fig GRAPH_FIG = build_graph_figure() # ============================================================ # GRADIO UI # ============================================================ EXAMPLE_QUESTIONS = [ "TelcoNet'in CEO'su kimdir?", "Bolu fiber kesintisinden hangi müşteriler etkilendi ve toplam tazminat ne kadar?", "CTO Elif Demir'e bağlı departman müdürleri kimler ve hangi projelerin sponsorluğunu yapıyor?", "Ericsson'un TelcoNet'teki tüm rolleri neler?", "DDoS saldırısına kim müdahale etti ve sonrasında hangi projeler başlatıldı?", "TelcoNet'in 2026 en büyük teknoloji riskleri ve başlatılan projeler nelerdir?" ] def compare(question): rag_ans, rag_info = standard_rag_query(question) graph_ans, graph_info = graphrag_query(question) return rag_ans, rag_info, graph_ans, graph_info def show_graph(): return GRAPH_FIG with gr.Blocks(title="🔌 Telgraf", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🔌 Telgraf — GraphRAG vs Standart RAG") gr.Markdown("Telekom domain'inde Knowledge Graph tabanlı bilgi erişimi karşılaştırması") with gr.Tab("⚔️ Karşılaştır"): question = gr.Textbox(label="Sorunuzu yazın", placeholder="Örn: Bolu fiber kesintisinin etkileri neler?", lines=2) gr.Examples(examples=[[q] for q in EXAMPLE_QUESTIONS], inputs=question) btn = gr.Button("🔍 Karşılaştır", variant="primary") with gr.Row(): with gr.Column(): gr.Markdown("### 📘 Standart RAG") rag_answer = gr.Textbox(label="Cevap", lines=10) rag_meta = gr.Textbox(label="Detay", lines=2) with gr.Column(): gr.Markdown("### 📗 GraphRAG") graph_answer = gr.Textbox(label="Cevap", lines=10) graph_meta = gr.Textbox(label="Detay", lines=2) btn.click(compare, inputs=question, outputs=[rag_answer, rag_meta, graph_answer, graph_meta]) with gr.Tab("🕸️ Knowledge Graph"): gr.Markdown("### İnteraktif Bilgi Grafiği") graph_btn = gr.Button("📊 Grafiği Göster") graph_plot = gr.Plot() graph_btn.click(show_graph, outputs=graph_plot) with gr.Tab("📄 Dokümanlar"): for name, text in documents.items(): with gr.Accordion(name, open=False): gr.Textbox(value=text, lines=15, interactive=False) with gr.Tab("ℹ️ Hakkında"): gr.Markdown(""" ## 🔌 Telgraf Projesi **GraphRAG vs Standart RAG** karşılaştırma demo'su. **Kurgusal şirket:** TelcoNet A.Ş. (Türk telekom operatörü) | Özellik | Standart RAG | GraphRAG | |---|---|---| | Basit sorular | ✅ İyi | ✅ İyi | | Multi-hop | ❌ Zayıf | ✅ Güçlü | | İlişki zinciri | ❌ Kuramıyor | ✅ Graph traversal | | Global özet | ❌ Yetersiz | ✅ Community summaries | **Tech:** Qwen2.5-7B, ChromaDB, NetworkX, Sentence-Transformers **Geliştiren:** [Cihat Yıldız](https://huggingface.co/cihatyldz) """) demo.launch()