Spaces:
Running
Running
File size: 3,324 Bytes
774ec97 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | from fastapi import APIRouter, UploadFile, File, HTTPException
import json
import tempfile
import os
from typing import List
import logging
from rag.core import RAGSystem
from config.models import Document
router = APIRouter(prefix="/documents", tags=["documents"])
logger = logging.getLogger(__name__)
# Instancia global del sistema RAG
rag_system = RAGSystem()
@router.post("/upload")
async def upload_document(file: UploadFile = File(...)):
"""Subir documento para enriquecer la base de conocimientos"""
try:
# Leer contenido
content = await file.read()
text_content = content.decode('utf-8')
# Extraer metadata básica
metadata = {
"filename": file.filename,
"content_type": file.content_type,
"size_bytes": len(content),
"upload_timestamp": datetime.now().isoformat()
}
# Procesar documento
rag_system.add_document(text_content, metadata)
return {
"status": "success",
"filename": file.filename,
"message": "Documento procesado correctamente"
}
except Exception as e:
logger.error(f"Error subiendo documento: {e}")
raise HTTPException(status_code=500, detail="Error procesando documento")
@router.post("/upload-json")
async def upload_json_documents(documents: List[Document]):
"""Subir documentos en formato estructurado"""
try:
processed_count = 0
for doc in documents:
rag_system.add_document(doc.content, doc.metadata)
processed_count += 1
return {
"status": "success",
"processed_count": processed_count,
"message": f"{processed_count} documentos procesados"
}
except Exception as e:
logger.error(f"Error subiendo documentos JSON: {e}")
raise HTTPException(status_code=500, detail="Error procesando documentos")
@router.get("/search")
async def search_documents(query: str, top_k: int = 5):
"""Buscar directamente en documentos"""
try:
from rag.embeddings import EmbeddingModel
from rag.retriever import VectorStoreFAISS
embedder = EmbeddingModel()
vector_store = VectorStoreFAISS()
query_embedding = embedder.embed_text(query).tolist()
results = vector_store.search_documents(query_embedding, top_k=top_k)
# Formatear resultados
formatted_results = []
if results['documents']:
for i, (doc, metadata) in enumerate(zip(results['documents'][0], results['metadatas'][0])):
formatted_results.append({
"rank": i + 1,
"content_preview": doc[:200] + "..." if len(doc) > 200 else doc,
"metadata": metadata,
"similarity": 1 - (results['distances'][0][i] if results['distances'] else 0)
})
return {
"query": query,
"results": formatted_results,
"count": len(formatted_results)
}
except Exception as e:
logger.error(f"Error buscando documentos: {e}")
raise HTTPException(status_code=500, detail="Error buscando documentos") |