Spaces:
Running
Running
| from fastapi import APIRouter, UploadFile, File, HTTPException | |
| import json | |
| import tempfile | |
| import os | |
| from typing import List | |
| import logging | |
| from rag.core import RAGSystem | |
| from config.models import Document | |
| router = APIRouter(prefix="/documents", tags=["documents"]) | |
| logger = logging.getLogger(__name__) | |
| # Instancia global del sistema RAG | |
| rag_system = RAGSystem() | |
| async def upload_document(file: UploadFile = File(...)): | |
| """Subir documento para enriquecer la base de conocimientos""" | |
| try: | |
| # Leer contenido | |
| content = await file.read() | |
| text_content = content.decode('utf-8') | |
| # Extraer metadata básica | |
| metadata = { | |
| "filename": file.filename, | |
| "content_type": file.content_type, | |
| "size_bytes": len(content), | |
| "upload_timestamp": datetime.now().isoformat() | |
| } | |
| # Procesar documento | |
| rag_system.add_document(text_content, metadata) | |
| return { | |
| "status": "success", | |
| "filename": file.filename, | |
| "message": "Documento procesado correctamente" | |
| } | |
| except Exception as e: | |
| logger.error(f"Error subiendo documento: {e}") | |
| raise HTTPException(status_code=500, detail="Error procesando documento") | |
| async def upload_json_documents(documents: List[Document]): | |
| """Subir documentos en formato estructurado""" | |
| try: | |
| processed_count = 0 | |
| for doc in documents: | |
| rag_system.add_document(doc.content, doc.metadata) | |
| processed_count += 1 | |
| return { | |
| "status": "success", | |
| "processed_count": processed_count, | |
| "message": f"{processed_count} documentos procesados" | |
| } | |
| except Exception as e: | |
| logger.error(f"Error subiendo documentos JSON: {e}") | |
| raise HTTPException(status_code=500, detail="Error procesando documentos") | |
| async def search_documents(query: str, top_k: int = 5): | |
| """Buscar directamente en documentos""" | |
| try: | |
| from rag.embeddings import EmbeddingModel | |
| from rag.retriever import VectorStoreFAISS | |
| embedder = EmbeddingModel() | |
| vector_store = VectorStoreFAISS() | |
| query_embedding = embedder.embed_text(query).tolist() | |
| results = vector_store.search_documents(query_embedding, top_k=top_k) | |
| # Formatear resultados | |
| formatted_results = [] | |
| if results['documents']: | |
| for i, (doc, metadata) in enumerate(zip(results['documents'][0], results['metadatas'][0])): | |
| formatted_results.append({ | |
| "rank": i + 1, | |
| "content_preview": doc[:200] + "..." if len(doc) > 200 else doc, | |
| "metadata": metadata, | |
| "similarity": 1 - (results['distances'][0][i] if results['distances'] else 0) | |
| }) | |
| return { | |
| "query": query, | |
| "results": formatted_results, | |
| "count": len(formatted_results) | |
| } | |
| except Exception as e: | |
| logger.error(f"Error buscando documentos: {e}") | |
| raise HTTPException(status_code=500, detail="Error buscando documentos") |