Spaces:

Dinesh310
/

demo2

Sleeping

App Files Files Community

Dinesh310 commited on Jan 23

Commit

8d56dc3

verified ·

1 Parent(s): c0db5fc

Upload 29 files

Browse files

Files changed (29) hide show

src/config/__init__.py +0 -0
src/config/__pycache__/__init__.cpython-313.pyc +0 -0
src/config/__pycache__/__init__.cpython-38.pyc +0 -0
src/config/__pycache__/config.cpython-313.pyc +0 -0
src/config/__pycache__/config.cpython-38.pyc +0 -0
src/config/config.py +33 -0
src/document_ingestion/__init__.py +0 -0
src/document_ingestion/__pycache__/__init__.cpython-313.pyc +0 -0
src/document_ingestion/__pycache__/document_processor.cpython-313.pyc +0 -0
src/document_ingestion/document_processor.py +104 -0
src/graph_builder/__init__.py +0 -0
src/graph_builder/__pycache__/__init__.cpython-313.pyc +0 -0
src/graph_builder/__pycache__/graph_builder.cpython-313.pyc +0 -0
src/graph_builder/graph_builder.py +60 -0
src/node/__init__.py +0 -0
src/node/__pycache__/__init__.cpython-313.pyc +0 -0
src/node/__pycache__/modesex.cpython-313.pyc +0 -0
src/node/__pycache__/nodes.cpython-313.pyc +0 -0
src/node/__pycache__/reactnode.cpython-313.pyc +0 -0
src/node/nodes.py +63 -0
src/node/reactnode.py +92 -0
src/state/__init__.py +0 -0
src/state/__pycache__/__init__.cpython-313.pyc +0 -0
src/state/__pycache__/rag_state.cpython-313.pyc +0 -0
src/state/rag_state.py +12 -0
src/vectorstore/__init__.py +0 -0
src/vectorstore/__pycache__/__init__.cpython-313.pyc +0 -0
src/vectorstore/__pycache__/vectorstore.cpython-313.pyc +0 -0
src/vectorstore/vectorstore.py +51 -0

src/config/__init__.py ADDED Viewed

File without changes

src/config/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (141 Bytes). View file

src/config/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (127 Bytes). View file

src/config/__pycache__/config.cpython-313.pyc ADDED Viewed

Binary file (1.31 kB). View file

src/config/__pycache__/config.cpython-38.pyc ADDED Viewed

Binary file (1 kB). View file

src/config/config.py ADDED Viewed

	@@ -0,0 +1,33 @@

+"""Configuration module for Agentic RAG system"""
+import os
+from dotenv import load_dotenv
+from langchain.chat_models import init_chat_model
+# Load environment variables
+load_dotenv()
+class Config:
+    """Configuration class for RAG system"""
+    # API Keys
+    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+    # Model Configuration
+    LLM_MODEL = "openai:gpt-4o"
+    # Document Processing
+    CHUNK_SIZE = 500
+    CHUNK_OVERLAP = 50
+    # Default URLs
+    DEFAULT_URLS = [
+        "https://lilianweng.github.io/posts/2023-06-23-agent/",
+        "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/"
+    ]
+    @classmethod
+    def get_llm(cls):
+        """Initialize and return the LLM model"""
+        os.environ["OPENAI_API_KEY"] = cls.OPENAI_API_KEY
+        return init_chat_model(cls.LLM_MODEL)

src/document_ingestion/__init__.py ADDED Viewed

File without changes

src/document_ingestion/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (153 Bytes). View file

src/document_ingestion/__pycache__/document_processor.cpython-313.pyc ADDED Viewed

Binary file (4.9 kB). View file

src/document_ingestion/document_processor.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""Document processing module for loading and splitting documents"""
+from typing import List
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain.schema import Document
+from typing import List, Union
+from pathlib import Path
+from langchain_community.document_loaders import (
+    WebBaseLoader,
+    PyPDFLoader,
+    TextLoader,
+    PyPDFDirectoryLoader
+)
+class DocumentProcessor:
+    """Handles document loading and processing"""
+    def __init__(self, chunk_size: int = 500, chunk_overlap: int = 50):
+        """
+        Initialize document processor
+        Args:
+            chunk_size: Size of text chunks
+            chunk_overlap: Overlap between chunks
+        """
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.splitter = RecursiveCharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap
+        )
+    def load_from_url(self, url: str) -> List[Document]:
+        """Load document(s) from a URL"""
+        loader = WebBaseLoader(url)
+        return loader.load()
+    def load_from_pdf_dir(self, directory: Union[str, Path]) -> List[Document]:
+        """Load documents from all PDFs inside a directory"""
+        loader = PyPDFDirectoryLoader(str(directory))
+        return loader.load()
+    def load_from_txt(self, file_path: Union[str, Path]) -> List[Document]:
+        """Load document(s) from a TXT file"""
+        loader = TextLoader(str(file_path), encoding="utf-8")
+        return loader.load()
+    def load_from_pdf(self, file_path: Union[str, Path]) -> List[Document]:
+        """Load document(s) from a PDF file"""
+        loader = PyPDFDirectoryLoader(str("data"))
+        return loader.load()
+    def load_documents(self, sources: List[str]) -> List[Document]:
+        """
+        Load documents from URLs, PDF directories, or TXT files
+        Args:
+            sources: List of URLs, PDF folder paths, or TXT file paths
+        Returns:
+            List of loaded documents
+        """
+        docs: List[Document] = []
+        for src in sources:
+            if src.startswith("http://") or src.startswith("https://"):
+                docs.extend(self.load_from_url(src))
+            path = Path("data")
+            if path.is_dir():  # PDF directory
+                docs.extend(self.load_from_pdf_dir(path))
+            elif path.suffix.lower() == ".txt":
+                docs.extend(self.load_from_txt(path))
+            else:
+                raise ValueError(
+                    f"Unsupported source type: {src}. "
+                    "Use URL, .txt file, or PDF directory."
+                )
+        return docs
+    def split_documents(self, documents: List[Document]) -> List[Document]:
+        """
+        Split documents into chunks
+        Args:
+            documents: List of documents to split
+        Returns:
+            List of split documents
+        """
+        return self.splitter.split_documents(documents)
+    def process_urls(self, urls: List[str]) -> List[Document]:
+        """
+        Complete pipeline to load and split documents
+        Args:
+            urls: List of URLs to process
+        Returns:
+            List of processed document chunks
+        """
+        docs = self.load_documents(urls)
+        return self.split_documents(docs)

src/graph_builder/__init__.py ADDED Viewed

File without changes

src/graph_builder/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (148 Bytes). View file

src/graph_builder/__pycache__/graph_builder.cpython-313.pyc ADDED Viewed

Binary file (2.31 kB). View file

src/graph_builder/graph_builder.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""Graph builder for LangGraph workflow"""
+from langgraph.graph import StateGraph, END
+from src.state.rag_state import RAGState
+from src.node.reactnode import RAGNodes
+class GraphBuilder:
+    """Builds and manages the LangGraph workflow"""
+    def __init__(self, retriever, llm):
+        """
+        Initialize graph builder
+        Args:
+            retriever: Document retriever instance
+            llm: Language model instance
+        """
+        self.nodes = RAGNodes(retriever, llm)
+        self.graph = None
+    def build(self):
+        """
+        Build the RAG workflow graph
+        Returns:
+            Compiled graph instance
+        """
+        # Create state graph
+        builder = StateGraph(RAGState)
+        # Add nodes
+        builder.add_node("retriever", self.nodes.retrieve_docs)
+        builder.add_node("responder", self.nodes.generate_answer)
+        # Set entry point
+        builder.set_entry_point("retriever")
+        # Add edges
+        builder.add_edge("retriever", "responder")
+        builder.add_edge("responder", END)
+        # Compile graph
+        self.graph = builder.compile()
+        return self.graph
+    def run(self, question: str) -> dict:
+        """
+        Run the RAG workflow
+        Args:
+            question: User question
+        Returns:
+            Final state with answer
+        """
+        if self.graph is None:
+            self.build()
+        initial_state = RAGState(question=question)
+        return self.graph.invoke(initial_state)

src/node/__init__.py ADDED Viewed

File without changes

src/node/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (139 Bytes). View file

src/node/__pycache__/modesex.cpython-313.pyc ADDED Viewed

Binary file (4.57 kB). View file

src/node/__pycache__/nodes.cpython-313.pyc ADDED Viewed

Binary file (2.28 kB). View file

src/node/__pycache__/reactnode.cpython-313.pyc ADDED Viewed

Binary file (4.57 kB). View file

src/node/nodes.py ADDED Viewed

	@@ -0,0 +1,63 @@

+"""LangGraph nodes for RAG workflow"""
+from src.state.rag_state import RAGState
+class RAGNodes:
+    """Contains node functions for RAG workflow"""
+    def __init__(self, retriever, llm):
+        """
+        Initialize RAG nodes
+        Args:
+            retriever: Document retriever instance
+            llm: Language model instance
+        """
+        self.retriever = retriever
+        self.llm = llm
+    def retrieve_docs(self, state: RAGState) -> RAGState:
+        """
+        Retrieve relevant documents node
+        Args:
+            state: Current RAG state
+        Returns:
+            Updated RAG state with retrieved documents
+        """
+        docs = self.retriever.invoke(state.question)
+        return RAGState(
+            question=state.question,
+            retrieved_docs=docs
+        )
+    def generate_answer(self, state: RAGState) -> RAGState:
+        """
+        Generate answer from retrieved documents node
+        Args:
+            state: Current RAG state with retrieved documents
+        Returns:
+            Updated RAG state with generated answer
+        """
+        # Combine retrieved documents into context
+        context = "\n\n".join([doc.page_content for doc in state.retrieved_docs])
+        # Create prompt
+        prompt = f"""Answer the question based on the context.
+Context:
+{context}
+Question: {state.question}"""
+        # Generate response
+        response = self.llm.invoke(prompt)
+        return RAGState(
+            question=state.question,
+            retrieved_docs=state.retrieved_docs,
+            answer=response.content
+        )

src/node/reactnode.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""LangGraph nodes for RAG workflow + ReAct Agent inside generate_content"""
+from typing import List, Optional
+from src.state.rag_state import RAGState
+from langchain_core.documents import Document
+from langchain_core.tools import Tool
+from langchain_core.messages import HumanMessage
+from langgraph.prebuilt import create_react_agent
+# Wikipedia tool
+from langchain_community.utilities import WikipediaAPIWrapper
+from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
+class RAGNodes:
+    """Contains node functions for RAG workflow"""
+    def __init__(self, retriever, llm):
+        self.retriever = retriever
+        self.llm = llm
+        self._agent = None  # lazy-init agent
+    def retrieve_docs(self, state: RAGState) -> RAGState:
+        """Classic retriever node"""
+        docs = self.retriever.invoke(state.question)
+        return RAGState(
+            question=state.question,
+            retrieved_docs=docs
+        )
+    def _build_tools(self) -> List[Tool]:
+        """Build retriever + wikipedia tools"""
+        def retriever_tool_fn(query: str) -> str:
+            docs: List[Document] = self.retriever.invoke(query)
+            if not docs:
+                return "No documents found."
+            merged = []
+            for i, d in enumerate(docs[:8], start=1):
+                meta = d.metadata if hasattr(d, "metadata") else {}
+                title = meta.get("title") or meta.get("source") or f"doc_{i}"
+                merged.append(f"[{i}] {title}\n{d.page_content}")
+            return "\n\n".join(merged)
+        retriever_tool = Tool(
+            name="retriever",
+            description="Fetch passages from indexed corpus.",
+            func=retriever_tool_fn,
+        )
+        wiki = WikipediaQueryRun(
+            api_wrapper=WikipediaAPIWrapper(top_k_results=3, lang="en")
+        )
+        wikipedia_tool = Tool(
+            name="wikipedia",
+            description="Search Wikipedia for general knowledge.",
+            func=wiki.run,
+        )
+        return [retriever_tool, wikipedia_tool]
+    def _build_agent(self):
+        """ReAct agent with tools"""
+        tools = self._build_tools()
+        system_prompt = (
+            "You are a helpful RAG agent. "
+            "Prefer 'retriever' for user-provided docs; use 'wikipedia' for general knowledge. "
+            "Return only the final useful answer."
+        )
+        self._agent = create_react_agent(self.llm, tools=tools,prompt=system_prompt)
+    def generate_answer(self, state: RAGState) -> RAGState:
+        """
+        Generate answer using ReAct agent with retriever + wikipedia.
+        """
+        if self._agent is None:
+            self._build_agent()
+        result = self._agent.invoke({"messages": [HumanMessage(content=state.question)]})
+        messages = result.get("messages", [])
+        answer: Optional[str] = None
+        if messages:
+            answer_msg = messages[-1]
+            answer = getattr(answer_msg, "content", None)
+        return RAGState(
+            question=state.question,
+            retrieved_docs=state.retrieved_docs,
+            answer=answer or "Could not generate answer."
+        )

src/state/__init__.py ADDED Viewed

File without changes

src/state/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (140 Bytes). View file

src/state/__pycache__/rag_state.cpython-313.pyc ADDED Viewed

Binary file (752 Bytes). View file

src/state/rag_state.py ADDED Viewed

	@@ -0,0 +1,12 @@

+"""RAG state definition for LangGraph"""
+from typing import List
+from pydantic import BaseModel
+from langchain.schema import Document
+class RAGState(BaseModel):
+    """State object for RAG workflow"""
+    question: str
+    retrieved_docs: List[Document] = []
+    answer: str = ""

src/vectorstore/__init__.py ADDED Viewed

File without changes

src/vectorstore/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (146 Bytes). View file

src/vectorstore/__pycache__/vectorstore.cpython-313.pyc ADDED Viewed

Binary file (2.43 kB). View file

src/vectorstore/vectorstore.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""Vector store module for document embedding and retrieval"""
+from typing import List
+from langchain_community.vectorstores import FAISS
+from langchain_openai import OpenAIEmbeddings
+from langchain.schema import Document
+class VectorStore:
+    """Manages vector store operations"""
+    def __init__(self):
+        """Initialize vector store with OpenAI embeddings"""
+        self.embedding = OpenAIEmbeddings()
+        self.vectorstore = None
+        self.retriever = None
+    def create_vectorstore(self, documents: List[Document]):
+        """
+        Create vector store from documents
+        Args:
+            documents: List of documents to embed
+        """
+        self.vectorstore = FAISS.from_documents(documents, self.embedding)
+        self.retriever = self.vectorstore.as_retriever()
+    def get_retriever(self):
+        """
+        Get the retriever instance
+        Returns:
+            Retriever instance
+        """
+        if self.retriever is None:
+            raise ValueError("Vector store not initialized. Call create_vectorstore first.")
+        return self.retriever
+    def retrieve(self, query: str, k: int = 4) -> List[Document]:
+        """
+        Retrieve relevant documents for a query
+        Args:
+            query: Search query
+            k: Number of documents to retrieve
+        Returns:
+            List of relevant documents
+        """
+        if self.retriever is None:
+            raise ValueError("Vector store not initialized. Call create_vectorstore first.")
+        return self.retriever.invoke(query)