Spaces:

Danielfonseca1212
/

Graphknowledge

Sleeping

App Files Files Community

Graphknowledge / graph_agent.py

Danielfonseca1212

Update graph_agent.py

2744df3 verified 2 months ago

raw

history blame contribute delete

5.56 kB

	# graph_agent.py — GraphRAG Agent: GPT-4o-mini + Neo4j Cypher
	from openai import OpenAI
	import re

	SYSTEM_PROMPT = """Você é um agente especialista em Graph Neural Networks para detecção de fraude.
	Você tem acesso a uma base de conhecimento em grafo Neo4j com 5 projetos de GNN.

	PROJETOS DISPONÍVEIS:
	1. Sistema Imune Digital — Deep RL (DQN Dueling), 3 agentes especialistas
	2. HetGNN Fraud — Grafo heterogêneo, HGTConv, 5 tipos de nó
	3. TGN Fraud Detection — Temporal GNN, memória GRU, stream e-commerce
	4. DOMINANT — Anomaly detection sem labels (IJCAI 2019)
	5. GraphSAGE Elliptic — Dataset real Bitcoin, inductive learning

	SCHEMA DO GRAFO:
	Nós: Projeto, Tecnologia, Conceito, Paper, Metrica
	Arestas:
	(Projeto)-[:USA]->(Tecnologia)
	(Projeto)-[:IMPLEMENTA]->(Conceito)
	(Projeto)-[:REFERENCIA]->(Paper)
	(Projeto)-[:TEM_METRICA]->(Metrica)
	(Projeto)-[:DIFERENTE_DE]->(Projeto)

	PROPRIEDADES:
	Projeto: nome, descricao, paradigma, dado, url, emoji, ano
	Tecnologia: nome
	Conceito: nome, descricao
	Paper: titulo, autores, venue, modelo
	Metrica: projeto, tipo, valor, dataset

	Sua tarefa:
	1. Gerar uma query Cypher para buscar informação relevante no grafo
	2. A query deve ser eficiente e específica à pergunta
	3. Retornar APENAS o Cypher, sem explicação, dentro de ```cypher ... ```

	Exemplos:
	Pergunta: "Quais projetos usam PyTorch Geometric?"
	```cypher
	MATCH (p:Projeto)-[:USA]->(t:Tecnologia {nome: 'PyTorch Geometric'})
	RETURN p.nome, p.descricao, p.url
	```

	Pergunta: "Qual projeto tem maior AUC?"
	```cypher
	MATCH (p:Projeto)-[:TEM_METRICA]->(m:Metrica {tipo: 'AUC'})
	RETURN p.nome, m.valor, m.dataset
	ORDER BY m.valor DESC
	```

	Pergunta: "Me explique o conceito de Inductive Learning"
	```cypher
	MATCH (c:Conceito {nome: 'Inductive Learning'})<-[:IMPLEMENTA]-(p:Projeto)
	RETURN c.nome, c.descricao, collect(p.nome) AS projetos
	```"""

	ANSWER_PROMPT = """Você é Daniel Fonseca, ML Engineer especialista em Graph Neural Networks para detecção de fraude.
	Responda de forma técnica, clara e entusiasmada sobre seus projetos.

	Contexto do grafo Neo4j:
	{context}

	Pergunta do usuário: {question}

	Instruções:
	- Responda em português
	- Seja específico e técnico
	- Cite os projetos relevantes com seus emojis
	- Se tiver URL de projeto, mencione que pode ser acessado no Hugging Face
	- Máximo 4 parágrafos
	- Finalize com uma frase que convide o usuário a explorar mais"""


	class GraphRAGAgent:
	def __init__(self, openai_api_key: str, neo4j_driver, neo4j_database: str):
	self.client = OpenAI(api_key=openai_api_key)
	self.driver = neo4j_driver
	self.database = neo4j_database
	self.model = "gpt-4o-mini"

	def gerar_cypher(self, pergunta: str) -> str:
	"""GPT gera Cypher a partir da pergunta em linguagem natural."""
	resp = self.client.chat.completions.create(
	model=self.model,
	messages=[
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": pergunta}
	],
	temperature=0.1,
	max_tokens=300,
	)
	texto = resp.choices[0].message.content
	# Extrai o Cypher do bloco de código
	match = re.search(r'```cypher\s(.?)\s*```', texto, re.DOTALL)
	if match:
	return match.group(1).strip()
	# Fallback: tenta extrair qualquer bloco de código
	match = re.search(r'```\s(.?)\s*```', texto, re.DOTALL)
	if match:
	return match.group(1).strip()
	return texto.strip()

	def executar_cypher(self, cypher: str) -> list:
	"""Executa Cypher no Neo4j e retorna resultados."""
	try:
	with self.driver.session(database=self.database) as session:
	result = session.run(cypher)
	return [dict(record) for record in result]
	except Exception as e:
	return [{"erro": str(e)}]

	def formatar_contexto(self, resultados: list) -> str:
	"""Formata resultados do Neo4j em texto para o LLM."""
	if not resultados:
	return "Nenhum resultado encontrado no grafo."
	if len(resultados) == 1 and "erro" in resultados[0]:
	return f"Erro na query: {resultados[0]['erro']}"
	linhas = []
	for r in resultados[:10]: # max 10 resultados
	linha = " \| ".join(f"{k}: {v}" for k, v in r.items() if v is not None)
	linhas.append(linha)
	return "\n".join(linhas)

	def gerar_resposta(self, pergunta: str, contexto: str) -> str:
	"""GPT gera resposta final com base no contexto do grafo."""
	prompt = ANSWER_PROMPT.format(context=contexto, question=pergunta)
	resp = self.client.chat.completions.create(
	model=self.model,
	messages=[{"role": "user", "content": prompt}],
	temperature=0.7,
	max_tokens=600,
	)
	return resp.choices[0].message.content

	def responder(self, pergunta: str) -> dict:
	"""
	Pipeline completo:
	1. Gera Cypher
	2. Executa no Neo4j
	3. Formata contexto
	4. Gera resposta
	"""
	cypher = self.gerar_cypher(pergunta)
	resultados = self.executar_cypher(cypher)
	contexto = self.formatar_contexto(resultados)
	resposta = self.gerar_resposta(pergunta, contexto)

	return {
	"cypher": cypher,
	"resultados": resultados,
	"contexto": contexto,
	"resposta": resposta,
	}