| """
|
| AetherMap Routes - Document Mapping & Semantic Search
|
| Integrates with AetherMap API for document clustering, NER, and semantic search.
|
| """
|
| from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Depends
|
| from pydantic import BaseModel, Field
|
| from typing import Optional, List, Dict, Any
|
| from sqlalchemy.orm import Session
|
| import io
|
|
|
| from app.api.deps import get_scoped_db
|
| from app.services.aethermap_client import aethermap, ProcessResult, SearchResult, EntityGraphResult
|
|
|
|
|
| router = APIRouter()
|
|
|
|
|
|
|
|
|
|
|
|
|
| class IndexDocumentsRequest(BaseModel):
|
| """Request to index documents from text list"""
|
| documents: List[str] = Field(..., description="Lista de textos para indexar")
|
| fast_mode: bool = Field(True, description="Modo rápido (PCA) ou preciso (UMAP)")
|
|
|
|
|
| class IndexEntitiesRequest(BaseModel):
|
| """Request to index entities from NUMIDIUM database"""
|
| entity_types: Optional[List[str]] = Field(None, description="Filtrar por tipos de entidade")
|
| limit: int = Field(500, description="Limite de entidades")
|
|
|
|
|
| class SemanticSearchRequest(BaseModel):
|
| """Request for semantic search"""
|
| query: str = Field(..., description="Termo de busca")
|
| turbo_mode: bool = Field(True, description="Modo turbo (mais rápido)")
|
|
|
|
|
| class IndexResponse(BaseModel):
|
| """Response from indexing"""
|
| job_id: str
|
| num_documents: int
|
| num_clusters: int
|
| num_noise: int
|
| metrics: Dict[str, Any] = {}
|
| cluster_analysis: Dict[str, Any] = {}
|
|
|
|
|
| class SearchResponse(BaseModel):
|
| """Response from search"""
|
| summary: str
|
| results: List[Dict[str, Any]] = []
|
|
|
|
|
| class EntityGraphResponse(BaseModel):
|
| """Response from NER extraction"""
|
| hubs: List[Dict[str, Any]] = []
|
| insights: Dict[str, Any] = {}
|
| node_count: int = 0
|
| edge_count: int = 0
|
|
|
|
|
| class StatusResponse(BaseModel):
|
| """AetherMap status"""
|
| connected: bool
|
| job_id: Optional[str] = None
|
| documents_indexed: int = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
| @router.get("/status", response_model=StatusResponse)
|
| async def get_status():
|
| """
|
| Get AetherMap connection status.
|
| """
|
| return StatusResponse(
|
| connected=True,
|
| job_id=aethermap.current_job_id,
|
| documents_indexed=0
|
| )
|
|
|
|
|
| @router.post("/index", response_model=IndexResponse)
|
| async def index_documents(request: IndexDocumentsRequest):
|
| """
|
| Index a list of documents for semantic search.
|
|
|
| The documents will be:
|
| - Embedded using sentence transformers
|
| - Clustered using HDBSCAN
|
| - Indexed in FAISS + BM25 for hybrid search
|
| """
|
| try:
|
| if not request.documents:
|
| raise HTTPException(status_code=400, detail="Nenhum documento fornecido")
|
|
|
| result = await aethermap.process_documents(
|
| texts=request.documents,
|
| fast_mode=request.fast_mode
|
| )
|
|
|
| return IndexResponse(
|
| job_id=result.job_id,
|
| num_documents=result.num_documents,
|
| num_clusters=result.num_clusters,
|
| num_noise=result.num_noise,
|
| metrics=result.metrics,
|
| cluster_analysis=result.cluster_analysis
|
| )
|
|
|
| except Exception as e:
|
| raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
| @router.post("/index-entities", response_model=IndexResponse)
|
| async def index_entities(
|
| request: IndexEntitiesRequest,
|
| db: Session = Depends(get_scoped_db)
|
| ):
|
| """
|
| Index entities from NUMIDIUM database.
|
|
|
| Collects entity names and descriptions, sends to AetherMap for processing.
|
| """
|
| from app.models.entity import Entity
|
|
|
| try:
|
| query = db.query(Entity)
|
|
|
| if request.entity_types:
|
| query = query.filter(Entity.type.in_(request.entity_types))
|
|
|
| entities = query.limit(request.limit).all()
|
|
|
| if not entities:
|
| raise HTTPException(status_code=404, detail="Nenhuma entidade encontrada")
|
|
|
|
|
| documents = []
|
| for e in entities:
|
| text = f"{e.name} ({e.type})"
|
| if e.description:
|
| text += f": {e.description[:1000]}"
|
| documents.append(text)
|
|
|
| result = await aethermap.process_documents(
|
| texts=documents,
|
| fast_mode=request.fast_mode if hasattr(request, 'fast_mode') else True
|
| )
|
|
|
| return IndexResponse(
|
| job_id=result.job_id,
|
| num_documents=result.num_documents,
|
| num_clusters=result.num_clusters,
|
| num_noise=result.num_noise,
|
| metrics=result.metrics,
|
| cluster_analysis=result.cluster_analysis
|
| )
|
|
|
| except HTTPException:
|
| raise
|
| except Exception as e:
|
| raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
| @router.post("/upload", response_model=IndexResponse)
|
| async def upload_documents(
|
| file: UploadFile = File(...),
|
| fast_mode: bool = Form(True)
|
| ):
|
| """
|
| Upload a file (TXT or CSV) for indexing.
|
|
|
| - TXT: One document per line
|
| - CSV: Will use first text column found
|
| """
|
| try:
|
| content = await file.read()
|
| text = content.decode('utf-8', errors='ignore')
|
|
|
|
|
| documents = [line.strip() for line in text.splitlines() if line.strip()]
|
|
|
| if not documents:
|
| raise HTTPException(status_code=400, detail="Arquivo vazio ou sem texto válido")
|
|
|
| result = await aethermap.process_documents(
|
| texts=documents,
|
| fast_mode=fast_mode
|
| )
|
|
|
| return IndexResponse(
|
| job_id=result.job_id,
|
| num_documents=result.num_documents,
|
| num_clusters=result.num_clusters,
|
| num_noise=result.num_noise,
|
| metrics=result.metrics,
|
| cluster_analysis=result.cluster_analysis
|
| )
|
|
|
| except HTTPException:
|
| raise
|
| except Exception as e:
|
| raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
| @router.post("/search", response_model=SearchResponse)
|
| async def semantic_search(request: SemanticSearchRequest):
|
| """
|
| Semantic search in indexed documents.
|
|
|
| Uses hybrid RAG (FAISS + BM25 + reranking + LLM).
|
| Returns a summary answering the query with citations.
|
| """
|
| try:
|
| if not aethermap.current_job_id:
|
| raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
|
|
|
| result = await aethermap.semantic_search(
|
| query=request.query,
|
| turbo_mode=request.turbo_mode
|
| )
|
|
|
| return SearchResponse(
|
| summary=result.summary,
|
| results=result.results
|
| )
|
|
|
| except HTTPException:
|
| raise
|
| except Exception as e:
|
| raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
| @router.post("/entities", response_model=EntityGraphResponse)
|
| async def extract_entities():
|
| """
|
| Extract named entities (NER) from indexed documents.
|
|
|
| Returns:
|
| - Hub entities (most connected)
|
| - Relationship insights
|
| - Graph metrics
|
| """
|
| try:
|
| if not aethermap.current_job_id:
|
| raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
|
|
|
| result = await aethermap.extract_entities()
|
|
|
| return EntityGraphResponse(
|
| hubs=result.hubs,
|
| insights=result.insights,
|
| node_count=len(result.nodes),
|
| edge_count=len(result.edges)
|
| )
|
|
|
| except HTTPException:
|
| raise
|
| except Exception as e:
|
| raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
| @router.post("/analyze")
|
| async def analyze_graph():
|
| """
|
| Analyze entity graph using LLM.
|
|
|
| Returns semantic insights about relationships and patterns.
|
| """
|
| try:
|
| if not aethermap.current_job_id:
|
| raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
|
|
|
| result = await aethermap.analyze_graph()
|
|
|
| return {
|
| "analysis": result.analysis,
|
| "key_entities": result.key_entities,
|
| "relationships": result.relationships
|
| }
|
|
|
| except HTTPException:
|
| raise
|
| except Exception as e:
|
| raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
| @router.post("/describe-clusters")
|
| async def describe_clusters():
|
| """
|
| Get LLM descriptions for each cluster found.
|
| """
|
| try:
|
| if not aethermap.current_job_id:
|
| raise HTTPException(status_code=400, detail="Nenhum documento indexado. Use /index primeiro.")
|
|
|
| result = await aethermap.describe_clusters()
|
|
|
| return result
|
|
|
| except Exception as e:
|
| raise HTTPException(status_code=500, detail=str(e))
|
|
|