Spaces:
Sleeping
Sleeping
File size: 4,631 Bytes
464b72a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | # RAG Admin Panel - PDF Upload Management (Port 9000)
import os
from pathlib import Path
from contextlib import asynccontextmanager
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from fastapi.requests import Request
from fastapi.responses import JSONResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
# Import RAG module
from app import rag
IS_HF_SPACE = bool(os.getenv("SPACE_ID"))
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Ensure RAG index is ready when admin panel starts."""
if not IS_HF_SPACE:
loaded = rag.load_vector_store()
if not loaded:
rag.rebuild_vector_store_from_pdfs()
yield
# Initialize FastAPI app for admin
admin_app = FastAPI(title="RAG Admin Panel", version="1.0.0", lifespan=lifespan)
# Add CORS middleware
admin_app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Mount static files and templates
BASE_DIR = Path(__file__).resolve().parent
admin_app.mount("/static", StaticFiles(directory=BASE_DIR / "static"), name="static")
templates = Jinja2Templates(directory=BASE_DIR / "templates")
@admin_app.get("/", response_class=HTMLResponse)
async def admin_home(request: Request):
"""Render the admin panel for PDF upload"""
return templates.TemplateResponse("admin.html", {"request": request})
@admin_app.post("/api/upload")
async def upload_pdf(file: UploadFile = File(...)):
"""Upload a PDF file for RAG processing"""
if not file.filename.lower().endswith('.pdf'):
raise HTTPException(status_code=400, detail="Only PDF files are allowed")
try:
# Initialize embeddings if not already done
rag.initialize_embeddings()
# Save uploaded file
RAG_DATA_DIR = Path(__file__).resolve().parent.parent / "rag_data"
RAG_DATA_DIR.mkdir(parents=True, exist_ok=True)
pdf_path = RAG_DATA_DIR / file.filename
content = await file.read()
with open(pdf_path, "wb") as f:
f.write(content)
# Process the PDF
chunks = rag.load_and_process_pdf(str(pdf_path))
if not chunks:
raise HTTPException(status_code=400, detail="Could not extract text from PDF")
# Create/update vector store
success = rag.create_vector_store(chunks)
if success:
rag.get_rag_status()
return JSONResponse({
"success": True,
"message": f"PDF '{file.filename}' uploaded and processed successfully",
"chunks_created": len(chunks),
"total_documents": len(rag.uploaded_documents)
})
else:
raise HTTPException(status_code=500, detail="Failed to create vector store")
except Exception as e:
print(f"RAG Upload Error: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to process PDF: {str(e)}")
@admin_app.get("/api/status")
async def get_status():
"""Get RAG system status"""
return JSONResponse(rag.get_rag_status())
@admin_app.post("/api/clear")
async def clear_data():
"""Clear all RAG data"""
rag.clear_rag_data()
return JSONResponse({"success": True, "message": "RAG data cleared"})
@admin_app.delete("/api/document/{filename}")
async def delete_document(filename: str):
"""Delete a specific document"""
try:
RAG_DATA_DIR = Path(__file__).resolve().parent.parent / "rag_data"
pdf_path = RAG_DATA_DIR / filename
if pdf_path.exists():
os.remove(pdf_path)
if list(RAG_DATA_DIR.glob("*.pdf")):
rag.rebuild_vector_store_from_pdfs()
else:
rag.clear_rag_data()
return JSONResponse({"success": True, "message": f"Document '{filename}' deleted"})
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to delete: {str(e)}")
@admin_app.post("/api/rebuild")
async def rebuild_data():
"""Rebuild vector store from all PDFs in rag_data."""
success = rag.rebuild_vector_store_from_pdfs()
if success:
return JSONResponse({"success": True, "message": "RAG rebuilt successfully from all PDFs"})
return JSONResponse({"success": False, "message": "No valid PDFs found to rebuild RAG"})
if __name__ == "__main__":
import uvicorn
uvicorn.run(admin_app, host="0.0.0.0", port=9000)
|