Spaces:

Madras1
/

AetherMap

Sleeping

App Files Files Community

Madras1 commited on 29 days ago

Commit

d7ca959

verified ·

1 Parent(s): abfbafc

Upload app.py

Browse files

Files changed (1) hide show

app.py +30 -3

app.py CHANGED Viewed

@@ -979,24 +979,47 @@ async def analyze_graph_api(job_id: str = Form(...)):
         cached_data = cache[job_id]
         df = cached_data["df"]
         textos = df["full_text"].tolist()
         # Extrair entidades e construir grafo
         entities_by_doc = extract_entities(textos)
         entity_network = build_entity_to_entity_graph(entities_by_doc)
         # Preparar resumo do grafo para o LLM
         nodes = entity_network.get("nodes", [])[:15]
         edges = entity_network.get("edges", [])[:20]
         hubs = entity_network.get("hubs", [])[:5]
         insights = entity_network.get("insights", {})
         graph_summary = f"""
-ANÁLISE DE KNOWLEDGE GRAPH
 ## Visão Geral
 - {len(nodes)} entidades principais
 - {insights.get('total_connections', 0)} conexões totais
 - {insights.get('hub_count', 0)} hubs identificados
 ## Entidades Principais (por importância):
 {chr(10).join([f"- {n['entity']} ({n['type']}): {n['docs']} docs, centralidade {n.get('centrality', 0)}" for n in nodes])}
@@ -1006,6 +1029,9 @@ ANÁLISE DE KNOWLEDGE GRAPH
 ## Conexões Mais Fortes:
 {chr(10).join([f"- {e['source_entity']} ↔ {e['target_entity']}: {e['weight']} co-ocorrências" for e in edges[:10]])}
 """
         # Prompt para análise
@@ -1019,10 +1045,11 @@ Por favor forneça:
 1. **Narrativa Central**: Qual é a história principal que conecta estas entidades?
 2. **Atores Chave**: Quem são os principais players e qual seu papel?
 3. **Relações Ocultas**: Que conexões não-óbvias você identifica?
-4. **Padrões**: Algum padrão interessante nos tipos de entidades ou conexões?
 5. **Investigação**: O que valeria a pena investigar mais a fundo?
-Responda de forma concisa e acionável, como um briefing de inteligência."""
         # Chamar LLM
         completion = groq_client.chat.completions.create(

         cached_data = cache[job_id]
         df = cached_data["df"]
         textos = df["full_text"].tolist()
+        clusters = df["cluster"].tolist() if "cluster" in df.columns else ["0"] * len(textos)
         # Extrair entidades e construir grafo
         entities_by_doc = extract_entities(textos)
         entity_network = build_entity_to_entity_graph(entities_by_doc)
+        # Mapear entidades por cluster
+        entity_clusters = defaultdict(lambda: defaultdict(int))
+        for doc_idx, (entities, cluster) in enumerate(zip(entities_by_doc, clusters)):
+            for ent_text, ent_type in entities:
+                entity_clusters[ent_text][str(cluster)] += 1
         # Preparar resumo do grafo para o LLM
         nodes = entity_network.get("nodes", [])[:15]
         edges = entity_network.get("edges", [])[:20]
         hubs = entity_network.get("hubs", [])[:5]
         insights = entity_network.get("insights", {})
+        # Criar contexto de clusters
+        cluster_context = []
+        unique_clusters = sorted(set(str(c) for c in clusters if str(c) != "-1"))
+        for cluster_id in unique_clusters[:5]:
+            cluster_docs = [textos[i][:200] for i, c in enumerate(clusters) if str(c) == cluster_id][:3]
+            cluster_entities = [(ent, entity_clusters[ent].get(cluster_id, 0))
+                               for ent in entity_clusters if entity_clusters[ent].get(cluster_id, 0) > 0]
+            cluster_entities.sort(key=lambda x: x[1], reverse=True)
+            cluster_context.append(f"""
+### Cluster {cluster_id} ({len([c for c in clusters if str(c) == cluster_id])} docs)
+Entidades principais: {', '.join([f"{e[0]}({e[1]})" for e in cluster_entities[:5]])}
+Exemplo de documento: "{cluster_docs[0][:150]}..."
+""")
         graph_summary = f"""
+ANÁLISE DE KNOWLEDGE GRAPH COM CONTEXTO
 ## Visão Geral
 - {len(nodes)} entidades principais
 - {insights.get('total_connections', 0)} conexões totais
 - {insights.get('hub_count', 0)} hubs identificados
+- {len(unique_clusters)} clusters de documentos
 ## Entidades Principais (por importância):
 {chr(10).join([f"- {n['entity']} ({n['type']}): {n['docs']} docs, centralidade {n.get('centrality', 0)}" for n in nodes])}
 ## Conexões Mais Fortes:
 {chr(10).join([f"- {e['source_entity']} ↔ {e['target_entity']}: {e['weight']} co-ocorrências" for e in edges[:10]])}
+## CONTEXTO POR CLUSTER (IMPORTANTE - USE ESTAS REFERÊNCIAS):
+{chr(10).join(cluster_context)}
 """
         # Prompt para análise
 1. **Narrativa Central**: Qual é a história principal que conecta estas entidades?
 2. **Atores Chave**: Quem são os principais players e qual seu papel?
 3. **Relações Ocultas**: Que conexões não-óbvias você identifica?
+4. **Padrões por Cluster**: Como as entidades se distribuem entre os clusters? Qual cluster tem foco diferente?
 5. **Investigação**: O que valeria a pena investigar mais a fundo?
+IMPORTANTE: Sempre referencie os clusters específicos (ex: "No Cluster 0...", "Já no Cluster 1...").
+Use os dados concretos fornecidos, não generalize. Seja específico citando entidades e clusters."""
         # Chamar LLM
         completion = groq_client.chat.completions.create(