Spaces:

aravsaxena884
/

trueRAG

Runtime error

App Files Files Community

aravsaxena884 commited on Aug 21, 2025

Commit

a5ec459

1 Parent(s): 54693e5

s

Browse files

Files changed (3) hide show

Dockerfile +30 -0
app.py +486 -0
req.txt +18 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements and install Python dependencies
+COPY req.txt .
+RUN pip install --no-cache-dir -r req.txt
+# Copy application code
+COPY app.py .
+# Create tmp directory for temporary files
+RUN mkdir -p /tmp && chmod 777 /tmp
+# Expose port
+EXPOSE 7860
+# Set environment variables
+ENV PORT=7860
+ENV PYTHONUNBUFFERED=1
+# Run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,486 @@

+import os
+import uuid
+import logging
+from typing import Annotated, Literal, Sequence, TypedDict, Optional, List
+import asyncio
+from contextlib import asynccontextmanager
+import requests
+from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, HttpUrl
+import uvicorn
+# LangChain imports
+from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
+from langchain_core.prompts import PromptTemplate
+from langchain_core.pydantic_v1 import Field
+from langchain_core.tools import tool
+from langchain_groq import ChatGroq
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_core.documents import Document
+# LangGraph imports
+from langgraph.graph import END, StateGraph, START
+from langgraph.graph.message import add_messages
+from langgraph.prebuilt import tools_condition, ToolNode
+# Docling imports
+from docling.document_converter import DocumentConverter
+from docling.datamodel.base_models import InputFormat
+# Qdrant imports
+from qdrant_client import QdrantClient
+from qdrant_client.http import models
+from qdrant_client.http.models import Distance, VectorParams, PointStruct
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Environment variables
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+QDRANT_URL = os.getenv("QDRANT_URL")
+QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
+if not GROQ_API_KEY:
+    raise ValueError("GROQ_API_KEY environment variable is required")
+# Global variables for clients and models
+qdrant_client = None
+embeddings_model = None
+llm = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Initialize global resources on startup"""
+    global qdrant_client, embeddings_model, llm
+    # Initialize Qdrant client
+    qdrant_client = QdrantClient(
+        url=QDRANT_URL,
+        api_key=QDRANT_API_KEY,
+        timeout=60
+    )
+    # Initialize embeddings model
+    embeddings_model = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2",
+        model_kwargs={'device': 'cpu'}
+    )
+    # Initialize LLM
+    llm = ChatGroq(
+        groq_api_key=GROQ_API_KEY,
+        model_name="mixtral-8x7b-32768",
+        temperature=0
+    )
+    logger.info("Application initialized successfully")
+    yield
+    # Cleanup
+    logger.info("Application shutting down")
+app = FastAPI(
+    title="Agentic RAG with PDF Processing",
+    description="Production-ready RAG system with agentic workflow for PDF Q&A",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Pydantic models
+class PDFUploadRequest(BaseModel):
+    pdf_url: HttpUrl
+    collection_name: Optional[str] = None
+class QuestionRequest(BaseModel):
+    question: str
+    collection_name: str
+class ChatResponse(BaseModel):
+    answer: str
+    sources: List[str] = []
+    metadata: dict = {}
+# Agent State
+class AgentState(TypedDict):
+    messages: Annotated[Sequence[BaseMessage], add_messages]
+    collection_name: str
+# Document processing functions
+async def download_pdf(url: str) -> bytes:
+    """Download PDF from URL"""
+    try:
+        response = requests.get(str(url), timeout=30)
+        response.raise_for_status()
+        return response.content
+    except Exception as e:
+        logger.error(f"Failed to download PDF: {e}")
+        raise HTTPException(status_code=400, detail=f"Failed to download PDF: {e}")
+async def extract_pdf_content(pdf_content: bytes) -> List[Document]:
+    """Extract content from PDF using Docling"""
+    try:
+        # Initialize document converter
+        converter = DocumentConverter()
+        # Save PDF content to temporary file
+        temp_file = f"/tmp/{uuid.uuid4()}.pdf"
+        with open(temp_file, "wb") as f:
+            f.write(pdf_content)
+        # Convert document
+        result = converter.convert(temp_file)
+        # Extract text and create documents
+        documents = []
+        full_text = result.document.export_to_markdown()
+        # Split text into chunks
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200,
+            separators=["\n\n", "\n", " ", ""]
+        )
+        chunks = text_splitter.split_text(full_text)
+        for i, chunk in enumerate(chunks):
+            doc = Document(
+                page_content=chunk,
+                metadata={
+                    "source": "pdf",
+                    "chunk_id": i,
+                    "total_chunks": len(chunks)
+                }
+            )
+            documents.append(doc)
+        # Clean up temporary file
+        os.remove(temp_file)
+        logger.info(f"Extracted {len(documents)} document chunks")
+        return documents
+    except Exception as e:
+        logger.error(f"Failed to extract PDF content: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to extract PDF content: {e}")
+async def store_in_qdrant(documents: List[Document], collection_name: str):
+    """Store documents in Qdrant vector database"""
+    try:
+        # Create collection if it doesn't exist
+        try:
+            qdrant_client.get_collection(collection_name)
+        except Exception:
+            qdrant_client.create_collection(
+                collection_name=collection_name,
+                vectors_config=VectorParams(size=384, distance=Distance.COSINE)
+            )
+        # Generate embeddings and store documents
+        points = []
+        for i, doc in enumerate(documents):
+            embedding = embeddings_model.embed_query(doc.page_content)
+            point = PointStruct(
+                id=i,
+                vector=embedding,
+                payload={
+                    "text": doc.page_content,
+                    "metadata": doc.metadata
+                }
+            )
+            points.append(point)
+        # Upload points in batches
+        batch_size = 100
+        for i in range(0, len(points), batch_size):
+            batch = points[i:i + batch_size]
+            qdrant_client.upsert(
+                collection_name=collection_name,
+                points=batch
+            )
+        logger.info(f"Stored {len(documents)} documents in Qdrant collection: {collection_name}")
+    except Exception as e:
+        logger.error(f"Failed to store documents in Qdrant: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to store documents: {e}")
+# RAG Tools
+@tool
+def retriever_tool(query: str, collection_name: str) -> str:
+    """Retrieve relevant documents from Qdrant based on the query."""
+    try:
+        # Generate query embedding
+        query_embedding = embeddings_model.embed_query(query)
+        # Search in Qdrant
+        search_result = qdrant_client.search(
+            collection_name=collection_name,
+            query_vector=query_embedding,
+            limit=5
+        )
+        # Format results
+        documents = []
+        for result in search_result:
+            documents.append(result.payload["text"])
+        return "\n\n".join(documents)
+    except Exception as e:
+        logger.error(f"Retrieval failed: {e}")
+        return "No relevant documents found."
+# Agent workflow functions
+def grade_documents(state) -> Literal["generate", "rewrite"]:
+    """Determines whether the retrieved documents are relevant to the question."""
+    logger.info("---CHECK RELEVANCE---")
+    messages = state["messages"]
+    last_message = messages[-1]
+    question = messages[0].content
+    docs = last_message.content
+    # Create a simple relevance check prompt
+    prompt = f"""
+    You are assessing the relevance of retrieved documents to a user question.
+    Question: {question}
+    Documents: {docs[:500]}...
+    Are these documents relevant to answer the question? Respond with only 'yes' or 'no'.
+    """
+    try:
+        response = llm.invoke([HumanMessage(content=prompt)])
+        decision = response.content.strip().lower()
+        if "yes" in decision:
+            logger.info("---DECISION: DOCS RELEVANT---")
+            return "generate"
+        else:
+            logger.info("---DECISION: DOCS NOT RELEVANT---")
+            return "rewrite"
+    except Exception:
+        # Default to generate if assessment fails
+        return "generate"
+def agent(state):
+    """Agent that decides whether to retrieve documents or end."""
+    logger.info("---CALL AGENT---")
+    messages = state["messages"]
+    collection_name = state["collection_name"]
+    # Bind the retriever tool to the model
+    tools = [retriever_tool]
+    model_with_tools = llm.bind_tools(tools)
+    # Add system message about using retrieval
+    system_prompt = HumanMessage(
+        content=f"""You are an AI assistant with access to a document retrieval tool.
+        Use the retriever_tool to find relevant information from the collection '{collection_name}'
+        to answer user questions. Always use the tool first before providing an answer."""
+    )
+    messages_with_system = [system_prompt] + messages
+    response = model_with_tools.invoke(messages_with_system)
+    return {"messages": [response]}
+def rewrite(state):
+    """Transform the query to produce a better question."""
+    logger.info("---TRANSFORM QUERY---")
+    messages = state["messages"]
+    question = messages[0].content
+    rewrite_prompt = f"""
+    Look at the input and try to reason about the underlying semantic intent/meaning.
+    Original question: {question}
+    Formulate an improved, more specific question that would help retrieve better documents:
+    """
+    try:
+        response = llm.invoke([HumanMessage(content=rewrite_prompt)])
+        return {"messages": [response]}
+    except Exception as e:
+        logger.error(f"Rewrite failed: {e}")
+        return {"messages": [HumanMessage(content=question)]}
+def generate(state):
+    """Generate final answer based on retrieved documents."""
+    logger.info("---GENERATE---")
+    messages = state["messages"]
+    question = messages[0].content
+    last_message = messages[-1]
+    docs = last_message.content
+    # RAG prompt
+    rag_prompt = f"""
+    Use the following pieces of context to answer the question at the end.
+    If you don't know the answer based on the context, just say that you don't know,
+    don't try to make up an answer.
+    Context:
+    {docs}
+    Question: {question}
+    Answer:
+    """
+    try:
+        response = llm.invoke([HumanMessage(content=rag_prompt)])
+        return {"messages": [response]}
+    except Exception as e:
+        logger.error(f"Generation failed: {e}")
+        return {"messages": [AIMessage(content="I apologize, but I encountered an error generating the response.")]}
+# Create workflow
+def create_workflow():
+    """Create the agent workflow graph."""
+    workflow = StateGraph(AgentState)
+    # Add nodes
+    workflow.add_node("agent", agent)
+    retrieve = ToolNode([retriever_tool])
+    workflow.add_node("retrieve", retrieve)
+    workflow.add_node("rewrite", rewrite)
+    workflow.add_node("generate", generate)
+    # Add edges
+    workflow.add_edge(START, "agent")
+    workflow.add_conditional_edges(
+        "agent",
+        tools_condition,
+        {
+            "tools": "retrieve",
+            END: END,
+        },
+    )
+    workflow.add_conditional_edges(
+        "retrieve",
+        grade_documents,
+        {
+            "generate": "generate",
+            "rewrite": "rewrite"
+        }
+    )
+    workflow.add_edge("generate", END)
+    workflow.add_edge("rewrite", "agent")
+    return workflow.compile()
+# API Endpoints
+@app.post("/upload-pdf", response_model=dict)
+async def upload_pdf(request: PDFUploadRequest, background_tasks: BackgroundTasks):
+    """Upload and process PDF from URL"""
+    try:
+        # Generate collection name if not provided
+        collection_name = request.collection_name or f"pdf_{uuid.uuid4().hex[:8]}"
+        # Download PDF
+        pdf_content = await download_pdf(request.pdf_url)
+        # Extract content
+        documents = await extract_pdf_content(pdf_content)
+        # Store in vector database
+        await store_in_qdrant(documents, collection_name)
+        return {
+            "status": "success",
+            "message": f"PDF processed successfully",
+            "collection_name": collection_name,
+            "document_count": len(documents)
+        }
+    except Exception as e:
+        logger.error(f"PDF upload failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/chat", response_model=ChatResponse)
+async def chat(request: QuestionRequest):
+    """Chat with the documents using agentic RAG"""
+    try:
+        # Check if collection exists
+        try:
+            qdrant_client.get_collection(request.collection_name)
+        except Exception:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Collection '{request.collection_name}' not found. Please upload a PDF first."
+            )
+        # Create workflow
+        workflow = create_workflow()
+        # Initial state
+        initial_state = {
+            "messages": [HumanMessage(content=request.question)],
+            "collection_name": request.collection_name
+        }
+        # Run the workflow
+        result = workflow.invoke(initial_state)
+        # Extract final answer
+        final_message = result["messages"][-1]
+        answer = final_message.content if hasattr(final_message, 'content') else str(final_message)
+        return ChatResponse(
+            answer=answer,
+            sources=[request.collection_name],
+            metadata={
+                "collection_name": request.collection_name,
+                "message_count": len(result["messages"])
+            }
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Chat failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Chat failed: {e}")
+@app.get("/collections", response_model=List[str])
+async def list_collections():
+    """List all available collections"""
+    try:
+        collections = qdrant_client.get_collections()
+        return [collection.name for collection in collections.collections]
+    except Exception as e:
+        logger.error(f"Failed to list collections: {e}")
+        return []
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {"status": "healthy", "message": "Agentic RAG service is running"}
+if __name__ == "__main__":
+    uvicorn.run(
+        "app:app",
+        host="0.0.0.0",
+        port=int(os.getenv("PORT", 7860)),
+        reload=False
+    )

req.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+fastapi
+uvicorn
+langchain
+langchain-core
+langchain-groq
+langchain-community
+langgraph
+docling
+qdrant-client
+sentence-transformers
+transformers
+torch
+requests
+pydantic
+python-multipart
+numpy
+pandas
+Pillow