Spaces:

ecotecds
/

Chatbot-RAG-v4

Running

NoeMartinezSanchez

Deploy Chatbot RAG with TinyLlama

774ec97 2 months ago

3.32 kB

	from fastapi import APIRouter, UploadFile, File, HTTPException
	import json
	import tempfile
	import os
	from typing import List
	import logging

	from rag.core import RAGSystem
	from config.models import Document

	router = APIRouter(prefix="/documents", tags=["documents"])
	logger = logging.getLogger(__name__)

	# Instancia global del sistema RAG
	rag_system = RAGSystem()

	@router.post("/upload")
	async def upload_document(file: UploadFile = File(...)):
	"""Subir documento para enriquecer la base de conocimientos"""
	try:
	# Leer contenido
	content = await file.read()
	text_content = content.decode('utf-8')

	# Extraer metadata básica
	metadata = {
	"filename": file.filename,
	"content_type": file.content_type,
	"size_bytes": len(content),
	"upload_timestamp": datetime.now().isoformat()
	}

	# Procesar documento
	rag_system.add_document(text_content, metadata)

	return {
	"status": "success",
	"filename": file.filename,
	"message": "Documento procesado correctamente"
	}

	except Exception as e:
	logger.error(f"Error subiendo documento: {e}")
	raise HTTPException(status_code=500, detail="Error procesando documento")

	@router.post("/upload-json")
	async def upload_json_documents(documents: List[Document]):
	"""Subir documentos en formato estructurado"""
	try:
	processed_count = 0

	for doc in documents:
	rag_system.add_document(doc.content, doc.metadata)
	processed_count += 1

	return {
	"status": "success",
	"processed_count": processed_count,
	"message": f"{processed_count} documentos procesados"
	}

	except Exception as e:
	logger.error(f"Error subiendo documentos JSON: {e}")
	raise HTTPException(status_code=500, detail="Error procesando documentos")

	@router.get("/search")
	async def search_documents(query: str, top_k: int = 5):
	"""Buscar directamente en documentos"""
	try:
	from rag.embeddings import EmbeddingModel
	from rag.retriever import VectorStoreFAISS

	embedder = EmbeddingModel()
	vector_store = VectorStoreFAISS()

	query_embedding = embedder.embed_text(query).tolist()
	results = vector_store.search_documents(query_embedding, top_k=top_k)

	# Formatear resultados
	formatted_results = []
	if results['documents']:
	for i, (doc, metadata) in enumerate(zip(results['documents'][0], results['metadatas'][0])):
	formatted_results.append({
	"rank": i + 1,
	"content_preview": doc[:200] + "..." if len(doc) > 200 else doc,
	"metadata": metadata,
	"similarity": 1 - (results['distances'][0][i] if results['distances'] else 0)
	})

	return {
	"query": query,
	"results": formatted_results,
	"count": len(formatted_results)
	}

	except Exception as e:
	logger.error(f"Error buscando documentos: {e}")
	raise HTTPException(status_code=500, detail="Error buscando documentos")