Spaces:

mahmoudalrefaey
/

PDFPal-PDF-chatbot

Sleeping

App Files Files Community

mahmoudalrefaey commited on Jul 10, 2025

Commit

c1138eb

verified ·

1 Parent(s): 40db345

Upload 2 files

Browse files

Files changed (2) hide show

modules/llm_manager.py +283 -0
modules/rag_pipeline.py +273 -0

modules/llm_manager.py ADDED Viewed

	@@ -0,0 +1,283 @@

+"""
+LLM Manager Module
+Handles local language models using transformers and HuggingFace
+"""
+import logging
+import torch
+from typing import Optional, Dict, Any
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    pipeline,
+    BitsAndBytesConfig
+)
+from langchain_community.llms import HuggingFacePipeline
+from langchain.callbacks.manager import CallbackManager
+class LLMManager:
+    """Manages local language models for text generation"""
+    def __init__(self, model_name: str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"):
+        """
+        Initialize LLM manager
+        Args:
+            model_name: Name of the HuggingFace model to use
+        """
+        self.model_name = model_name
+        self.tokenizer = None
+        self.model = None
+        self.pipeline = None
+        self.llm = None
+        # Configure logging
+        logging.basicConfig(level=logging.INFO)
+        self.logger = logging.getLogger(__name__)
+        # Model configuration
+        self.model_config = {
+            "TinyLlama/TinyLlama-1.1B-Chat-v1.0": {
+                "max_length": 1024,  # Reduced for speed
+                "temperature": 0.7,
+                "top_p": 0.95,
+                "do_sample": True,
+                "pad_token_id": 0,
+                "eos_token_id": 2
+            },
+            "microsoft/DialoGPT-medium": {
+                "max_length": 512,  # Reduced for speed
+                "temperature": 0.7,
+                "top_p": 0.9,
+                "do_sample": True,
+                "pad_token_id": 50256,
+                "eos_token_id": 50256
+            },
+            "microsoft/phi-2": {
+                "max_length": 2048,
+                "temperature": 0.7,
+                "top_p": 0.95,
+                "do_sample": True,
+                "pad_token_id": 0,
+                "eos_token_id": 50256
+            }
+        }
+        # Initialize model
+        self._initialize_model()
+    def _initialize_model(self):
+        """Initialize the language model"""
+        try:
+            self.logger.info(f"Loading language model: {self.model_name}")
+            # Check if CUDA is available
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            self.logger.info(f"Using device: {device}")
+            # Load tokenizer
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                trust_remote_code=True
+            )
+            # Set padding token if not set
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            # Load model with quantization for memory efficiency
+            if device == "cuda":
+                # Use 4-bit quantization for GPU
+                bnb_config = BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_use_double_quant=True,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_compute_dtype=torch.bfloat16
+                )
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_name,
+                    quantization_config=bnb_config,
+                    device_map="auto",
+                    trust_remote_code=True,
+                    torch_dtype=torch.bfloat16
+                )
+            else:
+                # Use CPU with 8-bit quantization
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_name,
+                    device_map="cpu",
+                    trust_remote_code=True,
+                    torch_dtype=torch.float32,
+                    low_cpu_mem_usage=True
+                )
+            # Get model configuration
+            config = self.model_config.get(self.model_name, self.model_config["TinyLlama/TinyLlama-1.1B-Chat-v1.0"])
+            # Create pipeline
+            self.pipeline = pipeline(
+                "text-generation",
+                model=self.model,
+                tokenizer=self.tokenizer,
+                max_length=config["max_length"],
+                temperature=config["temperature"],
+                top_p=config["top_p"],
+                do_sample=config["do_sample"],
+                pad_token_id=config["pad_token_id"],
+                eos_token_id=config["eos_token_id"],
+                return_full_text=False
+            )
+            # Create LangChain LLM wrapper
+            self.llm = HuggingFacePipeline(
+                pipeline=self.pipeline,
+                model_kwargs={"temperature": config["temperature"]}
+            )
+            self.logger.info("Language model loaded successfully")
+        except Exception as e:
+            self.logger.error(f"Error loading language model: {e}")
+            raise
+    def generate_response(self, prompt: str, max_tokens: int = 500, temperature: float = 0.7) -> str:
+        """
+        Generate response using the language model
+        Args:
+            prompt: Input prompt
+            max_tokens: Maximum number of tokens to generate
+            temperature: Sampling temperature
+        Returns:
+            Generated response
+        """
+        try:
+            if not self.llm:
+                raise ValueError("Language model not initialized")
+            self.logger.info(f"Generating response for prompt: {prompt[:50]}...")
+            # Format prompt based on model
+            formatted_prompt = self._format_prompt(prompt)
+            # Generate response
+            response = self.llm(
+                formatted_prompt,
+                max_new_tokens=max_tokens,
+                temperature=temperature,
+                do_sample=True
+            )
+            # Clean up response
+            cleaned_response = self._clean_response(response)
+            self.logger.info(f"Generated response: {cleaned_response[:50]}...")
+            return cleaned_response
+        except Exception as e:
+            self.logger.error(f"Error generating response: {e}")
+            raise
+    def _format_prompt(self, prompt: str) -> str:
+        """
+        Format prompt based on the model type
+        Args:
+            prompt: Raw prompt
+        Returns:
+            Formatted prompt
+        """
+        if "TinyLlama" in self.model_name:
+            # TinyLlama chat format
+            return f"<|system|>You are a helpful AI assistant. Answer questions based on the provided context.</s><|user|>{prompt}</s><|assistant|>"
+        elif "DialoGPT" in self.model_name:
+            # DialoGPT format
+            return f"User: {prompt}\nAssistant:"
+        elif "phi" in self.model_name:
+            # Phi format
+            return f"Instruct: {prompt}\nOutput:"
+        else:
+            # Default format
+            return prompt
+    def _clean_response(self, response: str) -> str:
+        """
+        Clean up the generated response
+        Args:
+            response: Raw response
+        Returns:
+            Cleaned response
+        """
+        # Remove prompt from response if present
+        if "Instruct:" in response:
+            response = response.split("Output:")[-1].strip()
+        elif "User:" in response:
+            response = response.split("Assistant:")[-1].strip()
+        elif "<|assistant|>" in response:
+            response = response.split("<|assistant|>")[-1].strip()
+        # Remove any remaining special tokens
+        response = response.replace("<|endoftext|>", "").replace("<|im_end|>", "").strip()
+        return response
+    def get_model_info(self) -> Dict[str, Any]:
+        """
+        Get information about the loaded model
+        Returns:
+            Dictionary with model information
+        """
+        if not self.model:
+            return {"status": "not_initialized"}
+        try:
+            # Get model parameters
+            total_params = sum(p.numel() for p in self.model.parameters())
+            trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
+            return {
+                "status": "initialized",
+                "model_name": self.model_name,
+                "total_parameters": f"{total_params:,}",
+                "trainable_parameters": f"{trainable_params:,}",
+                "device": next(self.model.parameters()).device,
+                "dtype": str(next(self.model.parameters()).dtype)
+            }
+        except Exception as e:
+            self.logger.error(f"Error getting model info: {e}")
+            return {"status": "error", "error": str(e)}
+    def change_model(self, model_name: str):
+        """
+        Change the language model
+        Args:
+            model_name: New model name
+        """
+        try:
+            self.logger.info(f"Changing model from {self.model_name} to {model_name}")
+            # Update model name
+            self.model_name = model_name
+            # Clear existing model
+            self.tokenizer = None
+            self.model = None
+            self.pipeline = None
+            self.llm = None
+            # Reinitialize with new model
+            self._initialize_model()
+            self.logger.info("Model changed successfully")
+        except Exception as e:
+            self.logger.error(f"Error changing model: {e}")
+            raise

modules/rag_pipeline.py ADDED Viewed

	@@ -0,0 +1,273 @@

+"""
+RAG Pipeline Module
+Orchestrates the retrieval-augmented generation process
+"""
+import logging
+from typing import List, Dict, Any, Optional
+from langchain.schema import Document
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain_community.vectorstores import FAISS
+from .embedding_manager import EmbeddingManager
+from .llm_manager import LLMManager
+class RAGPipeline:
+    """Retrieval-Augmented Generation pipeline"""
+    def __init__(self, knowledge_base: FAISS, llm_manager: LLMManager):
+        """
+        Initialize RAG pipeline
+        Args:
+            knowledge_base: FAISS vector store
+            llm_manager: LLM manager instance
+        """
+        self.knowledge_base = knowledge_base
+        self.llm_manager = llm_manager
+        self.retrieval_chain = None
+        # Configure logging
+        logging.basicConfig(level=logging.INFO)
+        self.logger = logging.getLogger(__name__)
+        # Initialize retrieval chain
+        self._initialize_retrieval_chain()
+    def _initialize_retrieval_chain(self):
+        """Initialize the retrieval QA chain"""
+        try:
+            self.logger.info("Initializing retrieval QA chain")
+            # Create custom prompt template
+            prompt_template = """You are a helpful AI assistant that answers questions based on the provided context.
+Context: {context}
+Question: {question}
+Please provide a comprehensive answer based on the context above. If the context doesn't contain enough information to answer the question, say so. Be accurate and helpful.
+Answer:"""
+            prompt = PromptTemplate(
+                template=prompt_template,
+                input_variables=["context", "question"]
+            )
+            # Create retrieval QA chain
+            self.retrieval_chain = RetrievalQA.from_chain_type(
+                llm=self.llm_manager.llm,
+                chain_type="stuff",
+                retriever=self.knowledge_base.as_retriever(
+                    search_type="similarity",
+                    search_kwargs={"k": 2}  # Reduced for speed
+                ),
+                chain_type_kwargs={"prompt": prompt},
+                return_source_documents=True
+            )
+            self.logger.info("Retrieval QA chain initialized successfully")
+        except Exception as e:
+            self.logger.error(f"Error initializing retrieval chain: {e}")
+            raise
+    def get_response(self, query: str, max_tokens: int = 500, temperature: float = 0.7) -> str:
+        """
+        Get response using RAG pipeline
+        Args:
+            query: User query
+            max_tokens: Maximum tokens for response
+            temperature: Sampling temperature
+        Returns:
+            Generated response
+        """
+        try:
+            if not self.retrieval_chain:
+                raise ValueError("Retrieval chain not initialized")
+            self.logger.info(f"Processing query: {query[:50]}...")
+            # Get relevant documents (reduced for speed)
+            relevant_docs = self.knowledge_base.similarity_search(query, k=2)
+            if not relevant_docs:
+                return "I couldn't find any relevant information in the provided documents to answer your question."
+            # Create context from relevant documents
+            context = self._create_context(relevant_docs)
+            # Generate response using LLM
+            response = self.llm_manager.generate_response(
+                prompt=self._create_prompt(query, context),
+                max_tokens=max_tokens,
+                temperature=temperature
+            )
+            self.logger.info(f"Generated response: {response[:50]}...")
+            return response
+        except Exception as e:
+            self.logger.error(f"Error in RAG pipeline: {e}")
+            return f"I encountered an error while processing your question: {str(e)}"
+    def _create_context(self, documents: List[Document]) -> str:
+        """
+        Create context string from relevant documents
+        Args:
+            documents: List of relevant documents
+        Returns:
+            Context string
+        """
+        context_parts = []
+        for i, doc in enumerate(documents, 1):
+            # Add document source if available
+            source = doc.metadata.get("source", "Unknown")
+            content = doc.page_content.strip()
+            context_parts.append(f"Document {i} (Source: {source}):\n{content}\n")
+        return "\n".join(context_parts)
+    def _create_prompt(self, query: str, context: str) -> str:
+        """
+        Create prompt for the LLM
+        Args:
+            query: User query
+            context: Retrieved context
+        Returns:
+            Formatted prompt
+        """
+        return f"""Based on the following context, please answer the user's question. If the context doesn't contain enough information to answer the question, say so.
+Context:
+{context}
+Question: {query}
+Answer:"""
+    def get_similar_documents(self, query: str, k: int = 4) -> List[Document]:
+        """
+        Get similar documents for a query
+        Args:
+            query: Search query
+            k: Number of documents to retrieve
+        Returns:
+            List of similar documents
+        """
+        try:
+            return self.knowledge_base.similarity_search(query, k=k)
+        except Exception as e:
+            self.logger.error(f"Error retrieving similar documents: {e}")
+            return []
+    def get_similar_documents_with_scores(self, query: str, k: int = 4) -> List[tuple]:
+        """
+        Get similar documents with similarity scores
+        Args:
+            query: Search query
+            k: Number of documents to retrieve
+        Returns:
+            List of (document, score) tuples
+        """
+        try:
+            return self.knowledge_base.similarity_search_with_score(query, k=k)
+        except Exception as e:
+            self.logger.error(f"Error retrieving similar documents with scores: {e}")
+            return []
+    def add_documents(self, documents: List[Document]):
+        """
+        Add new documents to the knowledge base
+        Args:
+            documents: List of documents to add
+        """
+        try:
+            if not documents:
+                return
+            self.logger.info(f"Adding {len(documents)} documents to knowledge base")
+            # Add documents to vector store
+            self.knowledge_base.add_documents(documents)
+            # Reinitialize retrieval chain with updated knowledge base
+            self._initialize_retrieval_chain()
+            self.logger.info("Documents added successfully")
+        except Exception as e:
+            self.logger.error(f"Error adding documents: {e}")
+            raise
+    def get_pipeline_info(self) -> Dict[str, Any]:
+        """
+        Get information about the RAG pipeline
+        Returns:
+            Dictionary with pipeline information
+        """
+        try:
+            # Get knowledge base info
+            kb_info = {}
+            if self.knowledge_base:
+                index = self.knowledge_base.index
+                kb_info = {
+                    "documents": index.ntotal if hasattr(index, 'ntotal') else "unknown",
+                    "index_type": type(index).__name__
+                }
+            # Get LLM info
+            llm_info = self.llm_manager.get_model_info()
+            return {
+                "status": "initialized" if self.retrieval_chain else "not_initialized",
+                "knowledge_base": kb_info,
+                "language_model": llm_info,
+                "retrieval_chain": "initialized" if self.retrieval_chain else "not_initialized"
+            }
+        except Exception as e:
+            self.logger.error(f"Error getting pipeline info: {e}")
+            return {"status": "error", "error": str(e)}
+    def update_retrieval_parameters(self, k: int = 4, search_type: str = "similarity"):
+        """
+        Update retrieval parameters
+        Args:
+            k: Number of documents to retrieve
+            search_type: Type of search (similarity, mmr, etc.)
+        """
+        try:
+            self.logger.info(f"Updating retrieval parameters: k={k}, search_type={search_type}")
+            # Update retriever
+            self.knowledge_base.as_retriever(
+                search_type=search_type,
+                search_kwargs={"k": k}
+            )
+            # Reinitialize chain
+            self._initialize_retrieval_chain()
+            self.logger.info("Retrieval parameters updated successfully")
+        except Exception as e:
+            self.logger.error(f"Error updating retrieval parameters: {e}")
+            raise