Spaces:

Madras1
/

Numidium

Runtime error

App Files Files Community

Madras1 commited on Jan 21

Commit

b05991c

verified ·

1 Parent(s): 7c9fcf8

Upload 77 files

Browse files

Files changed (2) hide show

app/services/__pycache__/investigator_agent.cpython-311.pyc +0 -0
app/services/investigator_agent.py +45 -4

app/services/__pycache__/investigator_agent.cpython-311.pyc CHANGED Viewed

Binary files a/app/services/__pycache__/investigator_agent.cpython-311.pyc and b/app/services/__pycache__/investigator_agent.cpython-311.pyc differ

app/services/investigator_agent.py CHANGED Viewed

@@ -3,6 +3,7 @@ Investigator Agent - Autonomous Investigation with Tool Calling
 Uses Cerebras native tool calling for multi-source investigations
 """
 import json
 import httpx
 from typing import Optional, List, Dict, Any
 from dataclasses import dataclass, field
@@ -15,6 +16,32 @@ from app.services.brazil_apis import consultar_cnpj
 from app.models.entity import Entity, Relationship
 @dataclass
 class Finding:
     """A discovery made during investigation"""
@@ -180,7 +207,7 @@ TOOLS = [
 ]
-SYSTEM_PROMPT = """Você é um agente investigador autônomo do sistema NUMIDIUM/AVANGARD.
 Sua missão é investigar temas usando múltiplas fontes de dados:
 - NUMIDIUM: Grafo de conhecimento com entidades e relacionamentos
@@ -200,7 +227,8 @@ Sua missão é investigar temas usando múltiplas fontes de dados:
 - Seja metódico e siga pistas
 - Não invente informações - use apenas dados das ferramentas
 - Priorize qualidade sobre quantidade
-- Cite sempre as fontes"""
 class InvestigatorAgent:
@@ -462,9 +490,22 @@ class InvestigatorAgent:
         if not final_summary:
             final_summary = await self._generate_report(mission)
         return InvestigationResult(
             mission=mission,
-            findings=self.findings,
             entities_discovered=self.entities_discovered,
             connections_mapped=self.connections_mapped,
             report=final_summary,
@@ -502,7 +543,7 @@ Gere relatório estruturado com: Resumo Executivo, Descobertas, Entidades, Recom
             {"role": "user", "content": prompt}
         ])
-        return response["choices"][0]["message"]["content"]
 # Singleton

 Uses Cerebras native tool calling for multi-source investigations
 """
 import json
+import re
 import httpx
 from typing import Optional, List, Dict, Any
 from dataclasses import dataclass, field
 from app.models.entity import Entity, Relationship
+def sanitize_text(text: str) -> str:
+    """
+    Clean up text from Qwen model that may contain thinking artifacts.
+    Removes special characters like ‖ that appear when thinking leaks through.
+    """
+    if not text:
+        return text
+    # Remove thinking tags and content between them
+    text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
+    text = re.sub(r'<\|think\|>.*?<\|/think\|>', '', text, flags=re.DOTALL)
+    # Remove the ‖ character that appears in buggy output (Unicode 2016)
+    text = text.replace('\u2016', '')
+    text = text.replace('‖', '')
+    # Remove other common thinking artifacts
+    text = re.sub(r'<\|.*?\|>', '', text)
+    # Clean up excessive whitespace
+    text = re.sub(r'\n{3,}', '\n\n', text)
+    text = re.sub(r' {2,}', ' ', text)
+    return text.strip()
 @dataclass
 class Finding:
     """A discovery made during investigation"""
 ]
+SYSTEM_PROMPT = """Você é um agente investigador autônomo do sistema NUMIDIUM/AVANGARD. /no_think
 Sua missão é investigar temas usando múltiplas fontes de dados:
 - NUMIDIUM: Grafo de conhecimento com entidades e relacionamentos
 - Seja metódico e siga pistas
 - Não invente informações - use apenas dados das ferramentas
 - Priorize qualidade sobre quantidade
+- Cite sempre as fontes
+- NÃO use pensamento interno ou tags <think>. Responda diretamente."""
 class InvestigatorAgent:
         if not final_summary:
             final_summary = await self._generate_report(mission)
+        # Sanitize all text outputs to remove thinking artifacts
+        final_summary = sanitize_text(final_summary)
+        # Sanitize findings content
+        sanitized_findings = []
+        for f in self.findings:
+            sanitized_findings.append(Finding(
+                title=sanitize_text(f.title),
+                content=sanitize_text(f.content),
+                source=f.source,
+                timestamp=f.timestamp
+            ))
         return InvestigationResult(
             mission=mission,
+            findings=sanitized_findings,
             entities_discovered=self.entities_discovered,
             connections_mapped=self.connections_mapped,
             report=final_summary,
             {"role": "user", "content": prompt}
         ])
+        return sanitize_text(response["choices"][0]["message"]["content"])
 # Singleton