|
|
| import gradio as gr |
| import json |
| import os |
| import re |
| import time |
| from collections import defaultdict |
|
|
| import networkx as nx |
| from community.community_louvain import best_partition |
| import plotly.graph_objects as go |
| import chromadb |
| from sentence_transformers import SentenceTransformer |
| from huggingface_hub import InferenceClient, hf_hub_download |
|
|
| |
| |
| |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct" |
| EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" |
| DATASET_REPO = "cihatyldz/telgraf-telconet-dataset" |
|
|
| llm_client = InferenceClient(token=HF_TOKEN) |
| embed_model = SentenceTransformer(EMBEDDING_MODEL) |
|
|
| |
| |
| |
| DOC_FILES = [ |
| "data/01_kurumsal_yapi.txt", |
| "data/02_ag_altyapisi.txt", |
| "data/03_ariza_kayitlari.txt", |
| "data/04_sla_musteri.txt", |
| "data/05_teknoloji_yol_haritasi.txt" |
| ] |
|
|
| documents = {} |
| for f in DOC_FILES: |
| path = hf_hub_download(repo_id=DATASET_REPO, filename=f, repo_type="dataset", token=HF_TOKEN) |
| name = f.split("/")[-1].replace(".txt", "") |
| with open(path, "r", encoding="utf-8") as fp: |
| documents[name] = fp.read() |
|
|
| graph_path = hf_hub_download(repo_id=DATASET_REPO, filename="graph_data.json", repo_type="dataset", token=HF_TOKEN) |
| with open(graph_path, "r", encoding="utf-8") as fp: |
| graph_data = json.load(fp) |
|
|
| print(f"Loaded {len(documents)} documents, {len(graph_data['entities'])} entities") |
|
|
| |
| |
| |
| def semantic_chunk(text, max_chunk_size=600): |
| paragraphs = [p.strip() for p in text.split("\n") if p.strip()] |
| chunks, current = [], "" |
| for para in paragraphs: |
| if len(current) + len(para) <= max_chunk_size: |
| current += para + " " |
| else: |
| if current: |
| chunks.append(current.strip()) |
| current = para + " " |
| if current.strip(): |
| chunks.append(current.strip()) |
| return chunks |
|
|
| all_chunks, chunk_metadata = [], [] |
| for doc_name, doc_text in documents.items(): |
| for i, chunk in enumerate(semantic_chunk(doc_text)): |
| all_chunks.append(chunk) |
| chunk_metadata.append({"doc_name": doc_name, "chunk_id": f"{doc_name}_chunk_{i}"}) |
|
|
| |
| chroma_client = chromadb.Client() |
| collection = chroma_client.create_collection(name="telconet", metadata={"hnsw:space": "cosine"}) |
| embeddings = embed_model.encode(all_chunks).tolist() |
| collection.add( |
| ids=[m["chunk_id"] for m in chunk_metadata], |
| documents=all_chunks, |
| embeddings=embeddings, |
| metadatas=chunk_metadata |
| ) |
|
|
| |
| |
| |
| def normalize_name(name): |
| return name.strip().lower().replace(" ", " ") |
|
|
| entity_map = {} |
| for e in graph_data["entities"]: |
| key = normalize_name(e["name"]) |
| entity_map[key] = e |
|
|
| G = nx.Graph() |
| for key, info in entity_map.items(): |
| G.add_node(key, label=info["name"], type=info.get("type", "UNKNOWN")) |
|
|
| for r in graph_data["relationships"]: |
| src = normalize_name(r.get("source", "")) |
| tgt = normalize_name(r.get("target", "")) |
| if src in entity_map and tgt in entity_map: |
| G.add_edge(src, tgt, relation=r.get("relation", "")) |
|
|
| partition = {k: int(v) for k, v in graph_data.get("partition", {}).items() if k in G} |
| community_summaries = graph_data.get("community_summaries", {}) |
|
|
| |
| |
| |
| def llm_generate(prompt, max_tokens=500): |
| resp = llm_client.chat_completion( |
| model=LLM_MODEL, |
| messages=[{"role": "user", "content": prompt}], |
| max_tokens=max_tokens, |
| temperature=0.2, |
| ) |
| return resp.choices[0].message.content |
|
|
| def standard_rag_query(question, top_k=5): |
| q_emb = embed_model.encode([question]).tolist() |
| results = collection.query(query_embeddings=q_emb, n_results=top_k) |
| context = "\n\n---\n\n".join(results["documents"][0]) |
| prompt = f"""Aşağıdaki bağlam bilgilerini kullanarak soruyu Türkçe yanıtla. |
| Sadece bağlamda bulunan bilgileri kullan. |
| |
| Bağlam: |
| {context} |
| |
| Soru: {question} |
| |
| Yanıt:""" |
| answer = llm_generate(prompt) |
| sources = list(set(s["doc_name"] for s in results["metadatas"][0])) |
| return answer, f"Kaynak: {', '.join(sources)} | {top_k} chunk kullanıldı" |
|
|
| def graphrag_query(question, top_k_comm=3, top_k_chunks=3): |
| q_emb = embed_model.encode([question])[0] |
| comm_scores = {} |
| for cid, summary in community_summaries.items(): |
| s_emb = embed_model.encode([summary])[0] |
| score = float(q_emb @ s_emb) / (float((q_emb**2).sum()**0.5) * float((s_emb**2).sum()**0.5)) |
| comm_scores[cid] = score |
| top_comms = sorted(comm_scores.items(), key=lambda x: -x[1])[:top_k_comm] |
|
|
| relevant_nodes = set() |
| for cid, _ in top_comms: |
| for n, c in partition.items(): |
| if c == int(cid): |
| relevant_nodes.add(n) |
| for nb in G.neighbors(n) if n in G else []: |
| relevant_nodes.add(nb) |
|
|
| parts = [] |
| for cid, score in top_comms: |
| parts.append(f"[Community {cid} - Skor: {score:.2f}]\n{community_summaries.get(str(cid), '')}") |
|
|
| rels = [] |
| for u, v, d in G.edges(data=True): |
| if u in relevant_nodes or v in relevant_nodes: |
| rels.append(f"{entity_map.get(u,{}).get('name',u)} → {d.get('relation','?')} → {entity_map.get(v,{}).get('name',v)}") |
| if rels: |
| parts.append("\nİlişkiler:\n" + "\n".join(rels[:30])) |
|
|
| q_emb_list = embed_model.encode([question]).tolist() |
| chunk_res = collection.query(query_embeddings=q_emb_list, n_results=top_k_chunks) |
| for ch in chunk_res["documents"][0]: |
| parts.append(f"\n[Metin]\n{ch}") |
|
|
| prompt = f"""Bilgi grafiği ve bağlamı kullanarak soruyu Türkçe yanıtla. İlişkileri takip ederek multi-hop çıkarımlar yap. |
| |
| Bağlam: |
| {"\n\n".join(parts)} |
| |
| Soru: {question} |
| |
| Yanıt:""" |
| answer = llm_generate(prompt) |
| info = f"Community: {[c[0] for c in top_comms]} | {len(relevant_nodes)} node, {len(rels)} ilişki" |
| return answer, info |
|
|
| |
| |
| |
| def build_graph_figure(): |
| if G.number_of_nodes() == 0: |
| return go.Figure() |
| pos = nx.spring_layout(G, k=2, iterations=50, seed=42) |
| colors = ['#FF6B6B','#4ECDC4','#45B7D1','#96CEB4','#FFEAA7','#DDA0DD','#98D8C8','#F7DC6F','#BB8FCE','#85C1E9'] |
| edge_x, edge_y = [], [] |
| for u, v in G.edges(): |
| x0,y0 = pos[u]; x1,y1 = pos[v] |
| edge_x.extend([x0,x1,None]); edge_y.extend([y0,y1,None]) |
| fig = go.Figure() |
| fig.add_trace(go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#ccc'), mode='lines', hoverinfo='none', showlegend=False)) |
| comm_set = set(partition.values()) if partition else {0} |
| for cid in comm_set: |
| nodes = [n for n,c in partition.items() if c == cid and n in pos] |
| if not nodes: |
| continue |
| fig.add_trace(go.Scatter( |
| x=[pos[n][0] for n in nodes], y=[pos[n][1] for n in nodes], |
| mode='markers+text', |
| text=[entity_map.get(n,{}).get("name",n)[:18] for n in nodes], |
| textposition="top center", textfont=dict(size=7), |
| hovertext=[f"<b>{entity_map.get(n,{}).get('name',n)}</b><br>Tür: {entity_map.get(n,{}).get('type','?')}<br>Bağlantı: {G.degree(n)}" for n in nodes], |
| hoverinfo='text', |
| marker=dict(size=[max(8,min(35,G.degree(n)*3)) for n in nodes], color=colors[cid%len(colors)], line=dict(width=1,color='white')), |
| name=f'Community {cid}' |
| )) |
| fig.update_layout(title='🔌 Telgraf — TelcoNet Knowledge Graph', showlegend=True, |
| xaxis=dict(showgrid=False,zeroline=False,showticklabels=False), |
| yaxis=dict(showgrid=False,zeroline=False,showticklabels=False), |
| plot_bgcolor='white', height=600) |
| return fig |
|
|
| GRAPH_FIG = build_graph_figure() |
|
|
| |
| |
| |
| EXAMPLE_QUESTIONS = [ |
| "TelcoNet'in CEO'su kimdir?", |
| "Bolu fiber kesintisinden hangi müşteriler etkilendi ve toplam tazminat ne kadar?", |
| "CTO Elif Demir'e bağlı departman müdürleri kimler ve hangi projelerin sponsorluğunu yapıyor?", |
| "Ericsson'un TelcoNet'teki tüm rolleri neler?", |
| "DDoS saldırısına kim müdahale etti ve sonrasında hangi projeler başlatıldı?", |
| "TelcoNet'in 2026 en büyük teknoloji riskleri ve başlatılan projeler nelerdir?" |
| ] |
|
|
| def compare(question): |
| rag_ans, rag_info = standard_rag_query(question) |
| graph_ans, graph_info = graphrag_query(question) |
| return rag_ans, rag_info, graph_ans, graph_info |
|
|
| def show_graph(): |
| return GRAPH_FIG |
|
|
| with gr.Blocks(title="🔌 Telgraf", theme=gr.themes.Soft()) as demo: |
| gr.Markdown("# 🔌 Telgraf — GraphRAG vs Standart RAG") |
| gr.Markdown("Telekom domain'inde Knowledge Graph tabanlı bilgi erişimi karşılaştırması") |
|
|
| with gr.Tab("⚔️ Karşılaştır"): |
| question = gr.Textbox(label="Sorunuzu yazın", placeholder="Örn: Bolu fiber kesintisinin etkileri neler?", lines=2) |
| gr.Examples(examples=[[q] for q in EXAMPLE_QUESTIONS], inputs=question) |
| btn = gr.Button("🔍 Karşılaştır", variant="primary") |
|
|
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("### 📘 Standart RAG") |
| rag_answer = gr.Textbox(label="Cevap", lines=10) |
| rag_meta = gr.Textbox(label="Detay", lines=2) |
| with gr.Column(): |
| gr.Markdown("### 📗 GraphRAG") |
| graph_answer = gr.Textbox(label="Cevap", lines=10) |
| graph_meta = gr.Textbox(label="Detay", lines=2) |
|
|
| btn.click(compare, inputs=question, outputs=[rag_answer, rag_meta, graph_answer, graph_meta]) |
|
|
| with gr.Tab("🕸️ Knowledge Graph"): |
| gr.Markdown("### İnteraktif Bilgi Grafiği") |
| graph_btn = gr.Button("📊 Grafiği Göster") |
| graph_plot = gr.Plot() |
| graph_btn.click(show_graph, outputs=graph_plot) |
|
|
| with gr.Tab("📄 Dokümanlar"): |
| for name, text in documents.items(): |
| with gr.Accordion(name, open=False): |
| gr.Textbox(value=text, lines=15, interactive=False) |
|
|
| with gr.Tab("ℹ️ Hakkında"): |
| gr.Markdown(""" |
| ## 🔌 Telgraf Projesi |
| |
| **GraphRAG vs Standart RAG** karşılaştırma demo'su. |
| |
| **Kurgusal şirket:** TelcoNet A.Ş. (Türk telekom operatörü) |
| |
| | Özellik | Standart RAG | GraphRAG | |
| |---|---|---| |
| | Basit sorular | ✅ İyi | ✅ İyi | |
| | Multi-hop | ❌ Zayıf | ✅ Güçlü | |
| | İlişki zinciri | ❌ Kuramıyor | ✅ Graph traversal | |
| | Global özet | ❌ Yetersiz | ✅ Community summaries | |
| |
| **Tech:** Qwen2.5-7B, ChromaDB, NetworkX, Sentence-Transformers |
| |
| **Geliştiren:** [Cihat Yıldız](https://huggingface.co/cihatyldz) |
| """) |
|
|
| demo.launch() |
|
|