Spaces:

Alamgirapi
/

Try

No application file

App Files Files Community

Alamgirapi commited on Aug 3, 2025

Commit

b325aad

verified ·

1 Parent(s): 5b3c7db

Upload folder src

Browse files

Files changed (33) hide show

src/SpeechToText/__init__.py +0 -0
src/SpeechToText/hamsa.py +142 -0
src/SpeechToText/sr.py +84 -0
src/TextToSpeech/__init__.py +0 -0
src/TextToSpeech/gtts_testing.mp3 +0 -0
src/TextToSpeech/gtts_tts.py +90 -0
src/TextToSpeech/hamsa.py +0 -0
src/__init__.py +0 -0
src/agenticRAG/__init__.py +0 -0
src/agenticRAG/components/__init__.py +0 -0
src/agenticRAG/components/document_parsing.py +214 -0
src/agenticRAG/components/embeddings.py +62 -0
src/agenticRAG/components/llm_factory.py +66 -0
src/agenticRAG/components/search_tools.py +71 -0
src/agenticRAG/components/vectorstore.py +297 -0
src/agenticRAG/gpt.py +340 -0
src/agenticRAG/graph/__init__.py +0 -0
src/agenticRAG/graph/builder.py +50 -0
src/agenticRAG/graph/router.py +11 -0
src/agenticRAG/main.py +105 -0
src/agenticRAG/models/__init__.py +0 -0
src/agenticRAG/models/schemas.py +25 -0
src/agenticRAG/models/state.py +17 -0
src/agenticRAG/nodes/__init__.py +0 -0
src/agenticRAG/nodes/direct_llm_node.py +33 -0
src/agenticRAG/nodes/query_router.py +41 -0
src/agenticRAG/nodes/query_upgrader.py +40 -0
src/agenticRAG/nodes/rag_node.py +48 -0
src/agenticRAG/nodes/web_search_node.py +44 -0
src/agenticRAG/prompt/__init__.py +0 -0
src/agenticRAG/prompt/prompts.py +159 -0
src/config/__init__.py +0 -0
src/config/settings.py +49 -0

src/SpeechToText/__init__.py ADDED Viewed

File without changes

src/SpeechToText/hamsa.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import requests
+import base64
+from datetime import datetime
+import json
+from dotenv import load_dotenv
+import os
+load_dotenv()
+def transcribe_audio_hamsa(audio, language, history):
+    """
+    Transcribe audio using Hamsa API
+    Args:
+        audio: Audio file path or audio data
+        language: Selected language from dropdown
+        history: Previous transcription history
+        api_key: Hamsa API key
+    Returns:
+        tuple: (updated_history, transcribed_text)
+    """
+    api_key = os.getenv("HAMS_API_KEY")
+    if not api_key:
+        raise ValueError("HAMS_API_KEY not set in environment variables")
+    if audio is None:
+        return history, ""
+    # Language codes for Hamsa API
+    language_codes = {
+        "English": "en",
+        "Arabic": "ar",
+        "Arabic (Egypt)": "ar",
+        "Arabic (UAE)": "ar",
+        "Arabic (Lebanon)": "ar",
+        "Arabic (Saudi Arabia)": "ar",
+        "Arabic (Kuwait)": "ar",
+        "Arabic (Qatar)": "ar",
+        "Arabic (Jordan)": "ar",
+        "Auto-detect": "auto"  # You may need to check if Hamsa supports auto-detection
+    }
+    try:
+        # Convert audio file to base64
+        if isinstance(audio, str):  # If audio is a file path
+            with open(audio, 'rb') as audio_file:
+                audio_bytes = audio_file.read()
+        else:  # If audio is already bytes
+            audio_bytes = audio
+        # Encode audio to base64
+        audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
+        # Get selected language code
+        selected_language = language_codes.get(language, "ar")
+        # Prepare API request
+        url = "https://api.tryhamsa.com/v1/realtime/stt"
+        payload = {
+            "audioList": [],  # Empty for single audio file
+            "audioBase64": audio_base64,
+            "language": selected_language,
+            "isEosEnabled": False,
+            "eosThreshold": 0.3
+        }
+        headers = {
+            "Authorization": api_key,
+            "Content-Type": "application/json"
+        }
+        # Make API request
+        response = requests.post(url, json=payload, headers=headers)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        # Parse response
+        result = response.json()
+        text = result.get("text", "")
+        # Handle auto-detection result formatting
+        if language == "Auto-detect" and text:
+            # You might want to add language detection info if Hamsa provides it
+            text = f"[Auto-detected] {text}"
+        # Add timestamp and transcription to history
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        new_entry = f"[{timestamp}] [{language}] {text}"
+        # Update history
+        if history:
+            updated_history = history + "\n" + new_entry
+        else:
+            updated_history = new_entry
+        return updated_history, text
+    except requests.exceptions.RequestException as e:
+        error_msg = f"API request failed: {e}"
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        new_entry = f"[{timestamp}] [{language}] ERROR: {error_msg}"
+        if history:
+            updated_history = history + "\n" + new_entry
+        else:
+            updated_history = new_entry
+        return updated_history, error_msg
+    except json.JSONDecodeError as e:
+        error_msg = f"Failed to parse API response: {e}"
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        new_entry = f"[{timestamp}] [{language}] ERROR: {error_msg}"
+        if history:
+            updated_history = history + "\n" + new_entry
+        else:
+            updated_history = new_entry
+        return updated_history, error_msg
+    except Exception as e:
+        error_msg = f"Unexpected error: {e}"
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        new_entry = f"[{timestamp}] [{language}] ERROR: {error_msg}"
+        if history:
+            updated_history = history + "\n" + new_entry
+        else:
+            updated_history = new_entry
+        return updated_history, error_msg
+def clear_history():
+    """Clear the transcription history"""
+    return "", ""
+# Example usage:
+# api_key = "your-hamsa-api-key-here"
+# history, text = transcribe_audio("path/to/audio.wav", "Arabic", "", api_key)
+# print(f"Transcribed text: {text}")
+# print(f"History: {history}")

src/SpeechToText/sr.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import speech_recognition as sr
+from datetime import datetime
+def transcribe_audio(audio, language, history):
+    recognizer = sr.Recognizer()
+    if audio is None:
+        return history, ""
+    # Language codes for Google Speech Recognition
+    language_codes = {
+        "English": "en-US",
+        "Arabic": "ar-SA",  # Saudi Arabic
+        "Arabic (Egypt)": "ar-EG",
+        "Arabic (UAE)": "ar-AE",
+        "Arabic (Lebanon)": "ar-LB",
+        "Arabic (Saudi Arabia)": "ar-SA",
+        "Arabic (Kuwait)": "ar-KW",
+        "Arabic (Qatar)": "ar-QA",
+        "Arabic (Jordan)": "ar-JO",
+        "Auto-detect": None  # Let Google auto-detect
+    }
+    try:
+        with sr.AudioFile(audio) as source:
+            # Adjust for ambient noise
+            recognizer.adjust_for_ambient_noise(source)
+            audio_data = recognizer.record(source)
+        # Get selected language code
+        selected_language = language_codes.get(language, "en-US")
+        # Transcribe based on language selection
+        if selected_language:
+            text = recognizer.recognize_google(audio_data, language=selected_language)
+        else:
+            # Auto-detect: try Arabic first, then English
+            try:
+                text = recognizer.recognize_google(audio_data, language="ar-SA")
+                detected_lang = "Arabic"
+            except:
+                text = recognizer.recognize_google(audio_data, language="en-US")
+                detected_lang = "English"
+            text = f"[{detected_lang}] {text}"
+        # Add timestamp and transcription to history
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        new_entry = f"[{timestamp}] [{language}] {text}"
+        # Update history
+        if history:
+            updated_history = history + "\n" + new_entry
+        else:
+            updated_history = new_entry
+        return updated_history, text
+    except sr.UnknownValueError:
+        error_msg = "Could not understand audio"
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        new_entry = f"[{timestamp}] [{language}] ERROR: {error_msg}"
+        if history:
+            updated_history = history + "\n" + new_entry
+        else:
+            updated_history = new_entry
+        return updated_history, error_msg
+    except sr.RequestError as e:
+        error_msg = f"Could not request results; {e}"
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        new_entry = f"[{timestamp}] [{language}] ERROR: {error_msg}"
+        if history:
+            updated_history = history + "\n" + new_entry
+        else:
+            updated_history = new_entry
+        return updated_history, error_msg
+def clear_history():
+    return "", ""

src/TextToSpeech/__init__.py ADDED Viewed

File without changes

src/TextToSpeech/gtts_testing.mp3 ADDED Viewed

Binary file (49.5 kB). View file

src/TextToSpeech/gtts_tts.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os
+from gtts import gTTS
+def text_to_speech_with_gtts_old(input_text, output_filepath, language="en"):
+    audioobj= gTTS(
+        text=input_text,
+        lang=language,
+        slow=False
+    )
+    audioobj.save(output_filepath)
+# input_text="Hi this is AI with Hassan for testing purpose!"
+input_text= "مرحبًا، هذا ذكاء اصطناعي مع حسن لغرض الاختبار!"
+text_to_speech_with_gtts_old(input_text=input_text, output_filepath="gtts_testing.mp3", language="ar")
+#Step1b: Setup Text to Speech–TTS–model with ElevenLabs
+import elevenlabs
+from elevenlabs.client import ElevenLabs
+ELEVENLABS_API_KEY=os.environ.get("ELEVEN_API_KEY")
+def text_to_speech_with_elevenlabs_old(input_text, output_filepath):
+    client=ElevenLabs(api_key=ELEVENLABS_API_KEY)
+    audio=client.generate(
+        text= input_text,
+        voice= "Aria",
+        output_format= "mp3_22050_32",
+        model= "eleven_turbo_v2"
+    )
+    elevenlabs.save(audio, output_filepath)
+#text_to_speech_with_elevenlabs_old(input_text, output_filepath="elevenlabs_testing.mp3")
+#Step2: Use Model for Text output to Voice
+import subprocess
+import platform
+def text_to_speech_with_gtts(input_text, output_filepath):
+    language="en"
+    audioobj= gTTS(
+        text=input_text,
+        lang=language,
+        slow=False
+    )
+    audioobj.save(output_filepath)
+    os_name = platform.system()
+    try:
+        if os_name == "Darwin":  # macOS
+            subprocess.run(['afplay', output_filepath])
+        elif os_name == "Windows":  # Windows
+            subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();'])
+        elif os_name == "Linux":  # Linux
+            subprocess.run(['aplay', output_filepath])  # Alternative: use 'mpg123' or 'ffplay'
+        else:
+            raise OSError("Unsupported operating system")
+    except Exception as e:
+        print(f"An error occurred while trying to play the audio: {e}")
+input_text="Hi this is Ai with Hassan, autoplay testing!"
+#text_to_speech_with_gtts(input_text=input_text, output_filepath="gtts_testing_autoplay.mp3")
+def text_to_speech_with_elevenlabs(input_text, output_filepath):
+    client=ElevenLabs(api_key=ELEVENLABS_API_KEY)
+    audio=client.generate(
+        text= input_text,
+        voice= "Aria",
+        output_format= "mp3_22050_32",
+        model= "eleven_turbo_v2"
+    )
+    elevenlabs.save(audio, output_filepath)
+    os_name = platform.system()
+    try:
+        if os_name == "Darwin":  # macOS
+            subprocess.run(['afplay', output_filepath])
+        elif os_name == "Windows":  # Windows
+            subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();'])
+        elif os_name == "Linux":  # Linux
+            subprocess.run(['aplay', output_filepath])  # Alternative: use 'mpg123' or 'ffplay'
+        else:
+            raise OSError("Unsupported operating system")
+    except Exception as e:
+        print(f"An error occurred while trying to play the audio: {e}")
+#text_to_speech_with_elevenlabs(input_text, output_filepath="elevenlabs_testing_autoplay.mp3")

src/TextToSpeech/hamsa.py ADDED Viewed

File without changes

src/__init__.py ADDED Viewed

File without changes

src/agenticRAG/__init__.py ADDED Viewed

File without changes

src/agenticRAG/components/__init__.py ADDED Viewed

File without changes

src/agenticRAG/components/document_parsing.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import os
+from typing import List, Union
+from pathlib import Path
+# LangChain imports
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import (
+    PyPDFLoader,
+    Docx2txtLoader,
+    TextLoader,
+    UnstructuredMarkdownLoader
+)
+from langchain.schema import Document
+class DocumentChunker:
+    """
+    A class to read various document types and chunk them using LangChain
+    """
+    def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
+        """
+        Initialize the DocumentChunker
+        Args:
+            chunk_size (int): Size of each chunk in characters
+            chunk_overlap (int): Number of characters to overlap between chunks
+        """
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+            length_function=len,
+            separators=["\n\n", "\n", " ", ""]
+        )
+    def read_pdf(self, file_path: str) -> List[Document]:
+        """Read PDF file and return documents"""
+        try:
+            loader = PyPDFLoader(file_path)
+            documents = loader.load()
+            return documents
+        except Exception as e:
+            print(f"Error reading PDF file {file_path}: {e}")
+            return []
+    def read_docx(self, file_path: str) -> List[Document]:
+        """Read DOCX file and return documents"""
+        try:
+            loader = Docx2txtLoader(file_path)
+            documents = loader.load()
+            return documents
+        except Exception as e:
+            print(f"Error reading DOCX file {file_path}: {e}")
+            return []
+    def read_txt(self, file_path: str) -> List[Document]:
+        """Read TXT file and return documents"""
+        try:
+            loader = TextLoader(file_path, encoding='utf-8')
+            documents = loader.load()
+            return documents
+        except Exception as e:
+            print(f"Error reading TXT file {file_path}: {e}")
+            return []
+    def read_md(self, file_path: str) -> List[Document]:
+        """Read Markdown file and return documents"""
+        try:
+            loader = UnstructuredMarkdownLoader(file_path)
+            documents = loader.load()
+            return documents
+        except Exception as e:
+            print(f"Error reading MD file {file_path}: {e}")
+            return []
+    def load_document(self, file_path: str) -> List[Document]:
+        """
+        Load document based on file extension
+        Args:
+            file_path (str): Path to the document file
+        Returns:
+            List[Document]: List of loaded documents
+        """
+        file_extension = Path(file_path).suffix.lower()
+        if file_extension == '.pdf':
+            return self.read_pdf(file_path)
+        elif file_extension == '.docx':
+            return self.read_docx(file_path)
+        elif file_extension == '.txt':
+            return self.read_txt(file_path)
+        elif file_extension == '.md':
+            return self.read_md(file_path)
+        else:
+            print(f"Unsupported file type: {file_extension}")
+            return []
+    def chunk_documents(self, documents: List[Document]) -> List[str]:
+        """
+        Chunk documents and return list of strings
+        Args:
+            documents (List[Document]): List of documents to chunk
+        Returns:
+            List[str]: List of chunked text strings
+        """
+        if not documents:
+            return []
+        # Split documents into chunks
+        chunks = self.text_splitter.split_documents(documents)
+        # Extract text content from chunks
+        chunk_texts = [chunk.page_content for chunk in chunks]
+        return chunk_texts
+    def process_file(self, file_path: str) -> List[str]:
+        """
+        Process a single file: load and chunk it
+        Args:
+            file_path (str): Path to the file to process
+        Returns:
+            List[str]: List of chunked text strings
+        """
+        if not os.path.exists(file_path):
+            print(f"File not found: {file_path}")
+            return []
+        # Load document
+        documents = self.load_document(file_path)
+        if not documents:
+            print(f"No content loaded from {file_path}")
+            return []
+        # Chunk documents
+        chunks = self.chunk_documents(documents)
+        print(f"Successfully processed {file_path}: {len(chunks)} chunks created")
+        return chunks
+    def process_multiple_files(self, file_paths: List[str]) -> List[str]:
+        """
+        Process multiple files and return combined chunks
+        Args:
+            file_paths (List[str]): List of file paths to process
+        Returns:
+            List[str]: Combined list of chunked text strings
+        """
+        all_chunks = []
+        for file_path in file_paths:
+            chunks = self.process_file(file_path)
+            all_chunks.extend(chunks)
+        return all_chunks
+# Example usage and utility functions
+def main():
+    """Example usage of the DocumentChunker class"""
+    # Initialize chunker with custom parameters
+    chunker = DocumentChunker(chunk_size=800, chunk_overlap=100)
+    # Example: Process a single file
+    file_path = "example.pdf"  # Replace with your file path
+    chunks = chunker.process_file(file_path)
+    if chunks:
+        print(f"Total chunks: {len(chunks)}")
+        print("\nFirst chunk preview:")
+        print(chunks[0][:200] + "..." if len(chunks[0]) > 200 else chunks[0])
+    # Example: Process multiple files
+    file_paths = [
+        "document1.pdf",
+        "document2.docx",
+        "document3.txt",
+        "document4.md"
+    ]
+    all_chunks = chunker.process_multiple_files(file_paths)
+    print(f"\nTotal chunks from all files: {len(all_chunks)}")
+    return all_chunks
+def create_chunker_with_custom_settings(chunk_size: int = 1000,
+                                       chunk_overlap: int = 200) -> DocumentChunker:
+    """
+    Create a DocumentChunker with custom settings
+    Args:
+        chunk_size (int): Size of each chunk
+        chunk_overlap (int): Overlap between chunks
+    Returns:
+        DocumentChunker: Configured chunker instance
+    """
+    return DocumentChunker(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+if __name__ == "__main__":
+    main()

src/agenticRAG/components/embeddings.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_openai import OpenAIEmbeddings
+from src.config.settings import settings
+from typing import Union, Literal
+class EmbeddingFactory:
+    """Factory for creating embedding instances"""
+    _huggingface_instance = None
+    _openai_instance = None
+    @classmethod
+    def get_embeddings(cls, provider: Literal["huggingface", "openai"] = "huggingface") -> Union[HuggingFaceEmbeddings, OpenAIEmbeddings]:
+        """Get or create embeddings instance (singleton pattern)"""
+        if provider == "huggingface":
+            if cls._huggingface_instance is None:
+                cls._huggingface_instance = HuggingFaceEmbeddings(
+                    model_name=settings.EMBEDDING_MODEL
+                )
+            return cls._huggingface_instance
+        elif provider == "openai":
+            if cls._openai_instance is None:
+                cls._openai_instance = OpenAIEmbeddings(
+                    model=settings.OPENAI_EMBEDDING_MODEL,
+                    openai_api_key=settings.OPENAI_API_KEY
+                )
+            return cls._openai_instance
+        else:
+            raise ValueError(f"Unsupported provider: {provider}")
+    @classmethod
+    def create_new_embeddings(cls, provider: Literal["huggingface", "openai"] = "huggingface", **kwargs) -> Union[HuggingFaceEmbeddings, OpenAIEmbeddings]:
+        """Create a new embeddings instance with custom parameters"""
+        if provider == "huggingface":
+            return HuggingFaceEmbeddings(
+                model_name=kwargs.get("model_name", settings.EMBEDDING_MODEL),
+                **{k: v for k, v in kwargs.items() if k != "model_name"}
+            )
+        elif provider == "openai":
+            return OpenAIEmbeddings(
+                model=kwargs.get("model", settings.OPENAI_EMBEDDING_MODEL),
+                openai_api_key=kwargs.get("api_key", settings.OPENAI_API_KEY),
+                **{k: v for k, v in kwargs.items() if k not in ["model", "api_key"]}
+            )
+        else:
+            raise ValueError(f"Unsupported provider: {provider}")
+    @classmethod
+    def get_huggingface_embeddings(cls) -> HuggingFaceEmbeddings:
+        """Convenience method to get HuggingFace embeddings"""
+        return cls.get_embeddings("huggingface")
+    @classmethod
+    def get_openai_embeddings(cls) -> OpenAIEmbeddings:
+        """Convenience method to get OpenAI embeddings"""
+        return cls.get_embeddings("openai")
+    @classmethod
+    def reset_instances(cls):
+        """Reset singleton instances (useful for testing)"""
+        cls._huggingface_instance = None
+        cls._openai_instance = None

src/agenticRAG/components/llm_factory.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from langchain_groq import ChatGroq
+from langchain_openai import ChatOpenAI
+from src.config.settings import settings
+from typing import Union, Literal
+class LLMFactory:
+    """Factory for creating LLM instances"""
+    _groq_instance = None
+    _openai_instance = None
+    @classmethod
+    def get_llm(cls, provider: Literal["groq", "openai"] = "groq") -> Union[ChatGroq, ChatOpenAI]:
+        """Get or create LLM instance (singleton pattern)"""
+        if provider == "groq":
+            if cls._groq_instance is None:
+                cls._groq_instance = ChatGroq(
+                    model=settings.GROQ_MODEL,
+                    temperature=settings.GROQ_TEMPERATURE,
+                    groq_api_key=settings.GROQ_API_KEY
+                )
+            return cls._groq_instance
+        elif provider == "openai":
+            if cls._openai_instance is None:
+                cls._openai_instance = ChatOpenAI(
+                    model=settings.OPENAI_MODEL,
+                    temperature=settings.OPENAI_TEMPERATURE,
+                    openai_api_key=settings.OPENAI_API_KEY
+                )
+            return cls._openai_instance
+        else:
+            raise ValueError(f"Unsupported provider: {provider}")
+    @classmethod
+    def create_new_llm(cls, provider: Literal["groq", "openai"] = "groq", **kwargs) -> Union[ChatGroq, ChatOpenAI]:
+        """Create a new LLM instance with custom parameters"""
+        if provider == "groq":
+            return ChatGroq(
+                model=kwargs.get("model", settings.GROQ_MODEL),
+                temperature=kwargs.get("temperature", settings.GROQ_TEMPERATURE),
+                groq_api_key=kwargs.get("api_key", settings.GROQ_API_KEY)
+            )
+        elif provider == "openai":
+            return ChatOpenAI(
+                model=kwargs.get("model", settings.OPENAI_MODEL),
+                temperature=kwargs.get("temperature", settings.OPENAI_TEMPERATURE),
+                openai_api_key=kwargs.get("api_key", settings.OPENAI_API_KEY)
+            )
+        else:
+            raise ValueError(f"Unsupported provider: {provider}")
+    @classmethod
+    def get_groq_llm(cls) -> ChatGroq:
+        """Convenience method to get Groq LLM"""
+        return cls.get_llm("groq")
+    @classmethod
+    def get_openai_llm(cls) -> ChatOpenAI:
+        """Convenience method to get OpenAI LLM"""
+        return cls.get_llm("openai")
+    @classmethod
+    def reset_instances(cls):
+        """Reset singleton instances (useful for testing)"""
+        cls._groq_instance = None
+        cls._openai_instance = None

src/agenticRAG/components/search_tools.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from langchain_tavily import TavilySearch
+from langchain_community.utilities import GoogleSerperAPIWrapper
+from langchain_community.tools import GoogleSerperRun
+from src.config.settings import settings
+from typing import Union, Literal
+class SearchToolFactory:
+    """Factory for creating search tools"""
+    _tavily_instance = None
+    _serper_instance = None
+    @classmethod
+    def get_search_tool(cls, provider: Literal["tavily", "serper"] = "tavily") -> Union[TavilySearch, GoogleSerperRun]:
+        """Get or create search tool instance (singleton pattern)"""
+        if provider == "tavily":
+            if cls._tavily_instance is None:
+                cls._tavily_instance = TavilySearch(
+                    api_key=settings.TAVILY_API_KEY
+                )
+            return cls._tavily_instance
+        elif provider == "serper":
+            if cls._serper_instance is None:
+                search_wrapper = GoogleSerperAPIWrapper(
+                    serper_api_key=settings.SERPER_API_KEY
+                )
+                cls._serper_instance = GoogleSerperRun(api_wrapper=search_wrapper)
+            return cls._serper_instance
+        else:
+            raise ValueError(f"Unsupported provider: {provider}")
+    @classmethod
+    def create_new_search_tool(cls, provider: Literal["tavily", "serper"] = "tavily", **kwargs) -> Union[TavilySearch, GoogleSerperRun]:
+        """Create a new search tool instance with custom parameters"""
+        if provider == "tavily":
+            return TavilySearch(
+                api_key=kwargs.get("api_key", settings.TAVILY_API_KEY),
+                max_results=kwargs.get("max_results", settings.SEARCH_RESULTS_COUNT),
+                search_depth=kwargs.get("search_depth", settings.TAVILY_SEARCH_DEPTH),
+                include_answer=kwargs.get("include_answer", settings.TAVILY_INCLUDE_ANSWER),
+                include_raw_content=kwargs.get("include_raw_content", settings.TAVILY_INCLUDE_RAW_CONTENT),
+                **{k: v for k, v in kwargs.items() if k not in ["api_key", "max_results", "search_depth", "include_answer", "include_raw_content"]}
+            )
+        elif provider == "serper":
+            search_wrapper = GoogleSerperAPIWrapper(
+                serper_api_key=kwargs.get("api_key", settings.SERPER_API_KEY),
+                k=kwargs.get("k", settings.SEARCH_RESULTS_COUNT),
+                type=kwargs.get("type", settings.SERPER_SEARCH_TYPE),
+                country=kwargs.get("country", settings.SERPER_COUNTRY),
+                location=kwargs.get("location", settings.SERPER_LOCATION),
+                **{k: v for k, v in kwargs.items() if k not in ["api_key", "k", "type", "country", "location"]}
+            )
+            return GoogleSerperRun(api_wrapper=search_wrapper)
+        else:
+            raise ValueError(f"Unsupported provider: {provider}")
+    @classmethod
+    def get_tavily_search(cls) -> TavilySearch:
+        """Convenience method to get Tavily search tool"""
+        return cls.get_search_tool("tavily")
+    @classmethod
+    def get_serper_search(cls) -> GoogleSerperRun:
+        """Convenience method to get Serper search tool"""
+        return cls.get_search_tool("serper")
+    @classmethod
+    def reset_instances(cls):
+        """Reset singleton instances (useful for testing)"""
+        cls._tavily_instance = None
+        cls._serper_instance = None

src/agenticRAG/components/vectorstore.py ADDED Viewed

	@@ -0,0 +1,297 @@

+from langchain_community.vectorstores import FAISS
+from langchain_huggingface  import HuggingFaceEmbeddings
+from typing import List, Optional
+from src.config.settings import settings
+from src.agenticRAG.components.embeddings import EmbeddingFactory
+import os
+from typing import Dict, Any, List, Optional
+from pathlib import Path
+from src.agenticRAG.components.document_parsing import DocumentChunker
+class VectorStoreManager:
+    """Manager for vector store operations"""
+    def __init__(self):
+        self.embeddings = EmbeddingFactory.get_embeddings()
+        self.vectorstore = None
+    def load_vectorstore(self, path: Optional[str] = None) -> bool:
+        """Load vector store from path"""
+        try:
+            path = path or settings.VECTORSTORE_PATH
+            if os.path.exists(path):
+                self.vectorstore = FAISS.load_local(path, self.embeddings, allow_dangerous_deserialization=True)
+                return True
+            return False
+        except Exception as e:
+            print(f"Error loading vectorstore: {e}")
+            return False
+    def search_documents(self, query: str, k: int = 3) -> List[str]:
+        """Search for similar documents"""
+        if not self.vectorstore:
+            return []
+        try:
+            docs = self.vectorstore.similarity_search(query, k=k)
+            return [doc.page_content for doc in docs]
+        except Exception as e:
+            print(f"Error searching documents: {e}")
+            return []
+    def add_documents(self, texts: List[str], metadatas: Optional[List[dict]] = None):
+        """Add documents to vector store"""
+        if not self.vectorstore:
+            self.vectorstore = FAISS.from_texts(texts, self.embeddings, metadatas=metadatas)
+        else:
+            self.vectorstore.add_texts(texts, metadatas=metadatas)
+    def save_vectorstore(self, path: Optional[str] = None):
+        """Save vector store to path"""
+        if self.vectorstore:
+            path = path or settings.VECTORSTORE_PATH
+            self.vectorstore.save_local(path)
+def store_documents_in_vectorstore(
+    file_paths: List[str],
+    vectorstore_manager: Optional[VectorStoreManager] = None,
+    chunk_size: int = 1000,
+    chunk_overlap: int = 200,
+    save_path: Optional[str] = None,
+    include_metadata: bool = True
+) -> Dict[str, Any]:
+    """
+    Process documents and store them in vector store
+    Args:
+        file_paths (List[str]): List of file paths to process
+        vectorstore_manager (VectorStoreManager, optional): Existing manager instance
+        chunk_size (int): Size of each chunk
+        chunk_overlap (int): Overlap between chunks
+        save_path (str, optional): Path to save the vector store
+        include_metadata (bool): Whether to include file metadata
+    Returns:
+        Dict[str, Any]: Processing results with statistics
+    """
+    # Initialize components
+    if vectorstore_manager is None:
+        vectorstore_manager = VectorStoreManager()
+    chunker = DocumentChunker(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    # Load existing vectorstore if available
+    vectorstore_manager.load_vectorstore(save_path)
+    # Track processing statistics
+    results = {
+        "total_files": len(file_paths),
+        "processed_files": 0,
+        "failed_files": [],
+        "total_chunks": 0,
+        "chunks_by_file": {}
+    }
+    try:
+        for file_path in file_paths:
+            try:
+                print(f"Processing file: {file_path}")
+                # Process file into chunks
+                chunks = chunker.process_file(file_path)
+                if chunks:
+                    # Prepare metadata if requested
+                    metadatas = None
+                    if include_metadata:
+                        file_name = Path(file_path).name
+                        file_extension = Path(file_path).suffix
+                        metadatas = [
+                            {
+                                "source": file_path,
+                                "file_name": file_name,
+                                "file_extension": file_extension,
+                                "chunk_index": i
+                            }
+                            for i in range(len(chunks))
+                        ]
+                    # Add documents to vector store
+                    vectorstore_manager.add_documents(chunks, metadatas)
+                    # Update statistics
+                    results["processed_files"] += 1
+                    results["total_chunks"] += len(chunks)
+                    results["chunks_by_file"][file_path] = len(chunks)
+                    print(f"Successfully processed {file_path}: {len(chunks)} chunks")
+                else:
+                    print(f"No chunks extracted from {file_path}")
+                    results["failed_files"].append(file_path)
+            except Exception as e:
+                print(f"Error processing file {file_path}: {e}")
+                results["failed_files"].append(file_path)
+        # Save the vector store
+        if results["total_chunks"] > 0:
+            vectorstore_manager.save_vectorstore(save_path)
+            print(f"Vector store saved with {results['total_chunks']} total chunks")
+        return results
+    except Exception as e:
+        print(f"Error in store_documents_in_vectorstore: {e}")
+        results["error"] = str(e)
+        return results
+def store_single_document_in_vectorstore(
+    file_path: str,
+    vectorstore_manager: Optional[VectorStoreManager] = None,
+    chunk_size: int = 1000,
+    chunk_overlap: int = 200,
+    save_path: Optional[str] = None
+) -> bool:
+    """
+    Process and store a single document in vector store
+    Args:
+        file_path (str): Path to the file to process
+        vectorstore_manager (VectorStoreManager, optional): Existing manager instance
+        chunk_size (int): Size of each chunk
+        chunk_overlap (int): Overlap between chunks
+        save_path (str, optional): Path to save the vector store
+    Returns:
+        bool: Success status
+    """
+    results = store_documents_in_vectorstore(
+        file_paths=[file_path],
+        vectorstore_manager=vectorstore_manager,
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        save_path=save_path
+    )
+    return results["processed_files"] > 0
+def batch_store_documents(
+    directory_path: str,
+    file_extensions: List[str] = [".pdf", ".docx", ".txt", ".md"],
+    vectorstore_manager: Optional[VectorStoreManager] = None,
+    chunk_size: int = 1000,
+    chunk_overlap: int = 200,
+    save_path: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Process and store all documents from a directory
+    Args:
+        directory_path (str): Path to directory containing documents
+        file_extensions (List[str]): List of file extensions to process
+        vectorstore_manager (VectorStoreManager, optional): Existing manager instance
+        chunk_size (int): Size of each chunk
+        chunk_overlap (int): Overlap between chunks
+        save_path (str, optional): Path to save the vector store
+    Returns:
+        Dict[str, Any]: Processing results
+    """
+    # Find all files with specified extensions
+    directory = Path(directory_path)
+    file_paths = []
+    for extension in file_extensions:
+        file_paths.extend(directory.glob(f"*{extension}"))
+    # Convert to string paths
+    file_paths = [str(path) for path in file_paths]
+    if not file_paths:
+        print(f"No files found in {directory_path} with extensions {file_extensions}")
+        return {"total_files": 0, "processed_files": 0, "failed_files": [], "total_chunks": 0}
+    print(f"Found {len(file_paths)} files to process")
+    return store_documents_in_vectorstore(
+        file_paths=file_paths,
+        vectorstore_manager=vectorstore_manager,
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        save_path=save_path
+    )
+# Example usage
+def main():
+    """Example usage of the vector store functions"""
+    # Initialize vector store manager
+    vs_manager = VectorStoreManager()
+    # Example 1: Store a single document
+    print("=== Storing Single Document ===")
+    file_path = "/home/ubuntu/OMANI-Therapist-Voice-ChatBot/KnowledgebaseFile/SuicideGuard_An_NLP-Based_Chrome_Extension_for_Detecting_Suicidal_Thoughts_in_Bengali.pdf"
+    success = store_single_document_in_vectorstore(
+        file_path=file_path,
+        vectorstore_manager=vs_manager,
+        chunk_size=1000,
+        chunk_overlap=150
+    )
+    print(f"Single document processing: {'Success' if success else 'Failed'}")
+    # # Example 2: Store multiple documents
+    # print("\n=== Storing Multiple Documents ===")
+    # file_paths = [
+    #     "document1.pdf",
+    #     "document2.docx",
+    #     "document3.txt"
+    # ]
+    # results = store_documents_in_vectorstore(
+    #     file_paths=file_paths,
+    #     vectorstore_manager=vs_manager,
+    #     chunk_size=1000,
+    #     chunk_overlap=200
+    # )
+    # print(f"Processing Results:")
+    # print(f"  Total files: {results['total_files']}")
+    # print(f"  Processed files: {results['processed_files']}")
+    # print(f"  Failed files: {results['failed_files']}")
+    # print(f"  Total chunks: {results['total_chunks']}")
+    # # Example 3: Batch process directory
+    # print("\n=== Batch Processing Directory ===")
+    # directory_path = "/home/ubuntu/OMANI-Therapist-Voice-ChatBot/KnowledgebaseFile/"
+    # batch_results = batch_store_documents(
+    #     directory_path=directory_path,
+    #     file_extensions=[".pdf", ".docx", ".txt", ".md"],
+    #     vectorstore_manager=vs_manager
+    # )
+    # print(f"Batch Processing Results:")
+    # print(f"  Total files: {batch_results['total_files']}")
+    # print(f"  Processed files: {batch_results['processed_files']}")
+    # print(f"  Total chunks: {batch_results['total_chunks']}")
+    # Example 4: Search the vector store
+    print("\n=== Searching Vector Store ===")
+    query = "suicide prevention techniques"
+    search_results = vs_manager.search_documents(query, k=3)
+    print(f"Search results for '{query}':")
+    for i, result in enumerate(search_results):
+        print(f"  Result {i+1}: {result[:200]}...")
+if __name__ == "__main__":
+    main()

src/agenticRAG/gpt.py ADDED Viewed

	@@ -0,0 +1,340 @@

+from openai import OpenAI
+import json
+import re
+from typing import Dict, List, Optional, Tuple
+from datetime import datetime
+import os
+from enum import Enum
+from loguru import logger
+from dotenv import load_dotenv
+load_dotenv()
+class EmotionalState(Enum):
+    CALM = "calm"
+    ANXIOUS = "anxious"
+    DEPRESSED = "depressed"
+    ANGRY = "angry"
+    DISTRESSED = "distressed"
+class OmaniTherapistAI:
+    def __init__(self, api_key: str = None):
+        """
+        Initialize the OMANI Therapist AI system
+        Args:
+            api_key: OpenAI API key (if not provided, will use environment variable)
+        """
+        self.api_key = api_key or os.getenv('OPENAI_API_KEY')
+        if not self.api_key:
+            raise ValueError("OpenAI API key is required")
+        self.client = OpenAI(api_key=self.api_key)
+        # Session management
+        self.conversation_history = []
+        self.user_profile = {}
+        self.emotional_state = EmotionalState.CALM
+        # System prompt for therapeutic conversations
+        self.system_prompt = self._create_system_prompt()
+    def _create_system_prompt(self) -> str:
+        """Create comprehensive system prompt for bilingual therapeutic conversations"""
+        return """You are a specialized mental health counselor for the Omani community. You are fluent in both Arabic (Omani dialect) and English, and you understand Gulf culture and Islamic values deeply.
+    ## Your Identity & Characteristics:
+    - Omani Mental Health Counselor
+    - Bilingual: Fluent in Omani Arabic and English
+    - Culturally competent in Gulf and Islamic traditions
+    - Understand family dynamics and Gulf society
+    - Integrate Islamic concepts in therapy when appropriate
+    - Handle code-switching naturally between Arabic and English
+    ## Your Therapeutic Skills:
+    - Cognitive Behavioral Therapy (CBT) adapted for Omani culture
+    - Active listening and empathy
+    - Anxiety and stress management techniques
+    - Family and relationship therapy
+    - Trauma-informed approaches
+    - Spiritual therapy compatible with Islam
+    ## Language Guidelines:
+    **CRITICAL: Always respond in the SAME language the user uses:**
+    - If user writes in Arabic → respond in Omani Arabic
+    - If user writes in English → respond in English
+    - If user mixes languages → mirror their code-switching pattern
+    - Maintain cultural sensitivity in both languages
+    ## Response Instructions:
+    - Start with warm greeting and check emotional state
+    - Ask open-ended questions to understand situation
+    - Use reframing and summarization techniques
+    - Offer practical coping strategies
+    - End with summary and follow-up suggestions
+    - Keep responses 100-200 words
+    - Show empathy and understanding
+    ## Cultural Sensitivity:
+    - Respect Islamic values and Omani traditions
+    - Avoid taboo or controversial topics
+    - Consider family/community role in mental health
+    - Use religious references wisely when appropriate
+    - Address mental health stigma sensitively
+    Remember: You are a supportive assistant, not a replacement for professional specialized therapy.
+    """
+    def detect_language(self, text: str) -> str:
+        """
+        Detect if text is primarily Arabic or English
+        Args:
+            text: Input text to analyze
+        Returns:
+            'arabic', 'english', or 'mixed'
+        """
+        # Count Arabic vs English characters
+        arabic_chars = sum(1 for char in text if '\u0600' <= char <= '\u06FF')
+        english_chars = sum(1 for char in text if char.isalpha() and char.isascii())
+        if arabic_chars > english_chars:
+            return 'arabic'
+        elif english_chars > arabic_chars:
+            return 'english'
+        else:
+            return 'mixed'
+    def analyze_emotional_state(self, user_input: str) -> Tuple[EmotionalState, str]:
+        """
+        Analyze user's emotional state from input
+        Args:
+            user_input: User's message in Arabic or English
+        Returns:
+            Tuple of (emotional_state, detected_language)
+        """
+        user_input_lower = user_input.lower()
+        detected_language = self.detect_language(user_input)
+        # Emotional state analysis using keywords (expanded for both languages)
+        anxiety_keywords = [
+            # Arabic
+            'قلق', 'خوف', 'توتر', 'قلقان', 'مضطرب', 'خايف', 'متوتر', 'مهموم',
+            'أشعر بالقلق', 'أخاف', 'عندي قلق', 'مش مرتاح', 'مو مرتاح',
+            # English
+            'anxiety', 'worried', 'nervous', 'anxious', 'panic', 'scared', 'fearful',
+            'feel anxious', 'feeling worried', 'i\'m scared', 'i\'m nervous'
+        ]
+        depression_keywords = [
+            # Arabic
+            'حزن', 'اكتئاب', 'مكتئب', 'حزين', 'يائس', 'زعلان', 'مش راضي',
+            'أشعر بالحزن', 'مو مبسوط', 'تعبان نفسياً', 'مش عارف شنو أسوي',
+            # English
+            'depressed', 'sad', 'hopeless', 'down', 'blue', 'miserable', 'unhappy',
+            'feeling down', 'feel sad', 'i\'m depressed', 'feeling hopeless'
+        ]
+        anger_keywords = [
+            # Arabic
+            'غضب', 'غاضب', 'زعلان', 'مستاء', 'عصبي', 'متضايق', 'مش راضي',
+            'أشعر بالغضب', 'مزعوج', 'معصب', 'متنرفز',
+            # English
+            'angry', 'mad', 'frustrated', 'irritated', 'annoyed', 'upset', 'furious',
+            'feel angry', 'i\'m mad', 'feeling frustrated', 'really upset'
+        ]
+        stress_keywords = [
+            # Arabic
+            'ضغط', 'ضغوط', 'تعب', 'مرهق', 'تعبان', 'مش قادر', 'صعب عليّ',
+            'أشعر بالضغط', 'مرهق نفسياً', 'ما أقدر أكمل',
+            # English
+            'stress', 'stressed', 'pressure', 'overwhelmed', 'exhausted', 'burned out',
+            'feeling stressed', 'under pressure', 'can\'t cope', 'too much pressure'
+        ]
+        if any(keyword in user_input_lower for keyword in anxiety_keywords):
+            return EmotionalState.ANXIOUS, detected_language
+        elif any(keyword in user_input_lower for keyword in depression_keywords):
+            return EmotionalState.DEPRESSED, detected_language
+        elif any(keyword in user_input_lower for keyword in anger_keywords):
+            return EmotionalState.ANGRY, detected_language
+        elif any(keyword in user_input_lower for keyword in stress_keywords):
+            return EmotionalState.DISTRESSED, detected_language
+        return EmotionalState.CALM, detected_language
+    def generate_therapeutic_response(self, user_input: str, include_history: bool = True) -> Dict:
+        """
+        Generate therapeutic response using OpenAI GPT-4o
+        Args:
+            user_input: User's message
+            include_history: Whether to include conversation history
+        Returns:
+            Dictionary containing response and metadata
+        """
+        try:
+            # Analyze emotional state and detect language
+            emotional_state, detected_language = self.analyze_emotional_state(user_input)
+            self.emotional_state = emotional_state
+            # Prepare messages for API
+            messages = [{"role": "system", "content": self.system_prompt}]
+            # Add language context to system prompt
+            language_instruction = f"\n\nIMPORTANT: The user is communicating in {detected_language}. Please respond in the same language they used."
+            messages[0]["content"] += language_instruction
+            # Add conversation history if requested
+            if include_history and self.conversation_history:
+                messages.extend(self.conversation_history[-6:])  # Last 6 messages for context
+            # Add current user message
+            messages.append({"role": "user", "content": user_input})
+            # Generate response using OpenAI
+            response = self.client.responses.create(
+                model="gpt-4.1-nano-2025-04-14",
+                input=messages,
+                temperature=0.7,
+            )
+            logger.info(f"Generated response: {response.output_text}")
+            ai_response = (response.output_text)
+            # Update conversation history
+            self.conversation_history.append({"role": "user", "content": user_input})
+            self.conversation_history.append({"role": "assistant", "content": ai_response})
+            # Keep only last 10 messages to manage context length
+            if len(self.conversation_history) > 10:
+                self.conversation_history = self.conversation_history[-10:]
+            return {
+                "response": ai_response,
+                "emotional_state": emotional_state.value,
+                "detected_language": detected_language,
+                "timestamp": datetime.now().isoformat(),
+            }
+        except Exception as e:
+            logger.error(f"Error generating response: {str(e)}")
+            # Error response in detected language
+            detected_language = self.detect_language(user_input)
+            if detected_language == 'english':
+                error_message = "Sorry, a technical error occurred. Please try again or contact a specialist."
+            else:
+                error_message = "آسف، حدث خطأ تقني. يرجى المحاولة مرة أخرى أو التواصل مع المختص."
+            return {
+                "response": error_message,
+                "emotional_state": "unknown",
+                "detected_language": detected_language,
+                "timestamp": datetime.now().isoformat(),
+                "error": str(e)
+            }
+    def get_conversation_summary(self) -> Dict:
+        """Get summary of current conversation session"""
+        return {
+            "total_messages": len(self.conversation_history),
+            "current_emotional_state": self.emotional_state.value,
+            "session_start": self.conversation_history[0].get("timestamp") if self.conversation_history else None,
+            "last_interaction": datetime.now().isoformat()
+        }
+    def clear_conversation(self):
+        """Clear conversation history and reset state"""
+        self.conversation_history = []
+        self.emotional_state = EmotionalState.CALM
+        logger.info("Conversation cleared")
+    def export_conversation(self, filename: str = None) -> str:
+        """Export conversation to JSON file"""
+        if not filename:
+            filename = f"therapy_session_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+        session_data = {
+            "session_metadata": self.get_conversation_summary(),
+            "conversation_history": self.conversation_history,
+            "export_timestamp": datetime.now().isoformat()
+        }
+        with open(filename, 'w', encoding='utf-8') as f:
+            json.dump(session_data, f, ensure_ascii=False, indent=2)
+        return filename
+# Helper function for easy integration
+def get_therapy_response(user_input: str, api_key: str = None) -> Dict:
+    """
+    Simple function to get therapeutic response
+    Args:
+        user_input: User's message
+        api_key: OpenAI API key
+    Returns:
+        Dictionary with response and metadata
+    """
+    therapist = OmaniTherapistAI(api_key)
+    return therapist.generate_therapeutic_response(user_input)
+def gpt_response(query):
+    therapist = OmaniTherapistAI()
+    response = therapist.generate_therapeutic_response(query)
+    print(f"AI Response: {response['response']}")
+    print(f"Emotional State: {response['emotional_state']}")
+    print(f"Detected Language: {response['detected_language']}")
+    return response
+# Example usage and testing
+if __name__ == "__main__":
+    # Test the system
+    therapist = OmaniTherapistAI()
+    # Test scenarios in both languages
+    test_scenarios = [
+        # Arabic scenarios
+        "السلام عليكم، أشعر بالقلق الشديد هذه الأيام",
+        "أواجه مشاكل في العمل وأشعر بالضغط",
+        "لا أستطيع النوم جيداً ومزاجي متقلب",
+        "أريد أن أتحدث عن مشاكلي مع زوجتي",
+        "أشعر بالاكتئاب ولا أعرف ماذا أفعل",
+        # English scenarios
+        "Hello, I'm feeling very anxious these days",
+        "I'm having problems at work and feeling stressed",
+        "I can't sleep well and my mood is unstable",
+        "I want to talk about my problems with my wife",
+        "I feel depressed and don't know what to do",
+        # Code-switching scenarios
+        "السلام عليكم، I'm feeling very stressed lately",
+        "Hello, أشعر بالقلق and I don't know what to do",
+        "My work is مرهق جداً and I can't cope"
+    ]
+    print("=== OMANI Therapist AI Test ===")
+    for i, scenario in enumerate(test_scenarios, 1):
+        print(f"\n--- Test Scenario {i} ---")
+        print(f"User: {scenario}")
+        response = therapist.generate_therapeutic_response(scenario)
+        print(f"AI Response: {response['response']}")
+        print(f"Emotional State: {response['emotional_state']}")
+        print(f"Detected Language: {response['detected_language']}")
+        print("-" * 50)
+    # Print conversation summary
+    print("\n=== Session Summary ===")
+    summary = therapist.get_conversation_summary()
+    print(json.dumps(summary, indent=2, ensure_ascii=False))

src/agenticRAG/graph/__init__.py ADDED Viewed

File without changes

src/agenticRAG/graph/builder.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from langgraph.graph import StateGraph, END
+from src.agenticRAG.models.state import AgentState
+from src.agenticRAG.nodes.query_upgrader import query_upgrader_node
+from src.agenticRAG.nodes.query_router import query_router_node
+from src.agenticRAG.nodes.rag_node import rag_node
+from src.agenticRAG.nodes.web_search_node import web_search_node
+from src.agenticRAG.nodes.direct_llm_node import direct_llm_node
+from src.agenticRAG.graph.router import route_query
+class GraphBuilder:
+    """Builder for the AgenticRAG graph"""
+    @staticmethod
+    def create_graph():
+        """Create the LangGraph workflow"""
+        # Initialize graph
+        workflow = StateGraph(AgentState)
+        # Add nodes
+        workflow.add_node("query_upgrader", query_upgrader_node)
+        workflow.add_node("query_router", query_router_node)
+        workflow.add_node("rag_path", rag_node)
+        workflow.add_node("web_search", web_search_node)
+        workflow.add_node("direct_llm", direct_llm_node)
+        # Set entry point
+        workflow.set_entry_point("query_upgrader")
+        # Add edges
+        workflow.add_edge("query_upgrader", "query_router")
+        # Add conditional edges based on routing decision
+        workflow.add_conditional_edges(
+            "query_router",
+            route_query,
+            {
+                "rag_path": "rag_path",
+                "web_search": "web_search",
+                "direct_llm": "direct_llm"
+            }
+        )
+        # All paths end at END
+        workflow.add_edge("rag_path", END)
+        workflow.add_edge("web_search", END)
+        workflow.add_edge("direct_llm", END)
+        # Compile the graph
+        return workflow.compile()

src/agenticRAG/graph/router.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from typing import Literal
+from src.agenticRAG.models.state import AgentState
+def route_query(state: AgentState) -> Literal["rag_path", "web_search", "direct_llm"]:
+    """Route to appropriate path based on decision"""
+    route_map = {
+        "RAG": "rag_path",
+        "WEB": "web_search",
+        "DIRECT": "direct_llm"
+    }
+    return route_map.get(state.route_decision, "direct_llm")

src/agenticRAG/main.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import time
+from typing import List
+from src.config.settings import settings
+from src.agenticRAG.models.state import AgentState
+from src.agenticRAG.models.schemas import QueryRequest, QueryResponse
+from src.agenticRAG.graph.builder import GraphBuilder
+from loguru import logger
+class AgenticRAGSystem:
+    """Main AgenticRAG system"""
+    def __init__(self):
+        # Validate settings
+        settings.validate()
+        # Create graph
+        self.app = GraphBuilder.create_graph()
+        logger.info("AgenticRAG system initialized successfully")
+    def process_query(self, query: str) -> QueryResponse:
+        """Process a single query"""
+        start_time = time.time()
+        try:
+            # Initialize state
+            initial_state = AgentState(user_query=query)
+            # Run the graph
+            final_state = self.app.invoke(initial_state)
+            # Calculate processing time
+            processing_time = time.time() - start_time
+            # Create response
+            response = QueryResponse(
+                query=final_state.user_query,
+                upgraded_query=final_state.upgraded_query,
+                route_taken=final_state.route_decision,
+                response=final_state.final_response,
+                metadata=final_state.metadata,
+                processing_time=processing_time
+            )
+            logger.info(f"Query processed successfully in {processing_time:.2f}s")
+            return response
+        except Exception as e:
+            logger.error(f"Error processing query: {e}")
+            raise
+    def process_batch(self, queries: List[str]) -> List[QueryResponse]:
+        """Process multiple queries"""
+        responses = []
+        for query in queries:
+            try:
+                response = self.process_query(query)
+                responses.append(response)
+            except Exception as e:
+                logger.error(f"Error processing query '{query}': {e}")
+        return responses
+def agenticRAGResponse(query: str) -> QueryResponse:
+    """Function to get response for a single query"""
+    system = AgenticRAGSystem()
+    return system.process_query(query)
+def main():
+    """Main function"""
+    # Initialize system
+    system = AgenticRAGSystem()
+    # Test queries
+    test_queries = [
+        "What is machine learning?",
+        "Latest news about AI",
+        "Write a poem about spring"
+    ]
+    # Process queries
+    for query in test_queries:
+        print(f"\n{'='*50}")
+        print(f"Query: {query}")
+        print(f"{'='*50}")
+        try:
+            response = system.process_query(query)
+            print(f"Original Query: {response.query}")
+            print(f"Upgraded Query: {response.upgraded_query}")
+            print(f"Route Taken: {response.route_taken}")
+            print(f"Response: {response.response}")
+            print(f"Processing Time: {response.processing_time:.2f}s")
+            print(f"Metadata: {response.metadata}")
+        except Exception as e:
+            print(f"Error: {e}")
+if __name__ == "__main__":
+    main()

src/agenticRAG/models/__init__.py ADDED Viewed

File without changes

src/agenticRAG/models/schemas.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from typing import List, Dict, Any, Optional
+from pydantic import BaseModel
+class QueryRequest(BaseModel):
+    """Request schema for query processing"""
+    query: str
+    session_id: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+class QueryResponse(BaseModel):
+    """Response schema for query processing"""
+    query: str
+    upgraded_query: str
+    route_taken: str
+    response: str
+    metadata: Dict[str, Any]
+    processing_time: float
+class ProcessingMetadata(BaseModel):
+    """Metadata for processing steps"""
+    upgrade_success: bool = False
+    routing_success: bool = False
+    path_success: bool = False
+    errors: List[str] = []
+    processing_time: float = 0.0

src/agenticRAG/models/state.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from typing import Dict, List, Any
+from pydantic import BaseModel, Field
+class AgentState(BaseModel):
+    """State schema for the AgenticRAG workflow"""
+    user_query: str = Field(description="Original user query")
+    upgraded_query: str = Field(default="", description="Enhanced query")
+    route_decision: str = Field(default="", description="Routing decision")
+    retrieved_docs: List[str] = Field(default_factory=list, description="Retrieved documents")
+    search_results: List[str] = Field(default_factory=list, description="Web search results")
+    final_response: str = Field(default="", description="Final response")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
+    class Config:
+        """Pydantic configuration"""
+        arbitrary_types_allowed = True

src/agenticRAG/nodes/__init__.py ADDED Viewed

File without changes

src/agenticRAG/nodes/direct_llm_node.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from src.agenticRAG.models.state import AgentState
+from src.agenticRAG.components.llm_factory import LLMFactory
+from src.agenticRAG.prompt.prompts import Prompts
+class DirectLLMNode:
+    """Node for direct LLM processing"""
+    def __init__(self):
+        self.llm = LLMFactory.get_llm()
+        self.prompt = Prompts.DIRECT_RESPONSE
+    def process_direct_llm(self, state: AgentState) -> AgentState:
+        """Process direct LLM path"""
+        try:
+            chain = self.prompt | self.llm
+            response = chain.invoke({"query": state.upgraded_query})
+            state.final_response = response.content
+            state.metadata["direct_llm_success"] = True
+        except Exception as e:
+            state.final_response = "Sorry, I couldn't process your request at the moment."
+            state.metadata["direct_llm_success"] = False
+            state.metadata["direct_llm_error"] = str(e)
+        return state
+# Node function for LangGraph
+def direct_llm_node(state: AgentState) -> AgentState:
+    """Node function for direct LLM processing"""
+    direct_processor = DirectLLMNode()
+    return direct_processor.process_direct_llm(state)

src/agenticRAG/nodes/query_router.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from typing import Literal
+from src.agenticRAG.models.state import AgentState
+from src.agenticRAG.components.llm_factory import LLMFactory
+from src.agenticRAG.prompt.prompts import Prompts
+from src.config.settings import settings
+class QueryRouter:
+    """Node for routing queries to appropriate paths"""
+    def __init__(self):
+        self.llm = LLMFactory.get_llm()
+        self.prompt = Prompts.QUERY_ROUTER
+    def route_query(self, state: AgentState) -> AgentState:
+        """Route query to appropriate path"""
+        chain = self.prompt | self.llm
+        try:
+            response = chain.invoke({"query": state.upgraded_query})
+            route_decision = response.content.strip().upper()
+            # Validate route decision
+            if route_decision not in ["RAG", "WEB", "DIRECT"]:
+                route_decision = settings.DEFAULT_ROUTE
+            state.route_decision = route_decision
+            state.metadata["routing_success"] = True
+        except Exception as e:
+            state.route_decision = settings.DEFAULT_ROUTE
+            state.metadata["routing_success"] = False
+            state.metadata["routing_error"] = str(e)
+        return state
+# Node function for LangGraph
+def query_router_node(state: AgentState) -> AgentState:
+    """Node function for query routing"""
+    router = QueryRouter()
+    return router.route_query(state)

src/agenticRAG/nodes/query_upgrader.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from src.agenticRAG.models.state import AgentState
+from src.agenticRAG.components.llm_factory import LLMFactory
+from src.agenticRAG.prompt.prompts import Prompts
+from src.config.settings import settings
+class QueryUpgrader:
+    """Node for upgrading user queries"""
+    def __init__(self):
+        self.llm = LLMFactory.get_llm()
+        self.prompt = Prompts.QUERY_UPGRADER
+    def upgrade_query(self, state: AgentState) -> AgentState:
+        """Upgrade/enhance the user query"""
+        chain = self.prompt | self.llm
+        try:
+            response = chain.invoke({"query": state.user_query})
+            upgraded_query = response.content.strip()
+            # Fallback to original if upgrade fails
+            if not upgraded_query or len(upgraded_query) > settings.MAX_QUERY_LENGTH:
+                upgraded_query = state.user_query
+            state.upgraded_query = upgraded_query
+            state.metadata["upgrade_success"] = True
+        except Exception as e:
+            state.upgraded_query = state.user_query
+            state.metadata["upgrade_success"] = False
+            state.metadata["upgrade_error"] = str(e)
+        return state
+# Node function for LangGraph
+def query_upgrader_node(state: AgentState) -> AgentState:
+    """Node function for query upgrading"""
+    upgrader = QueryUpgrader()
+    return upgrader.upgrade_query(state)

src/agenticRAG/nodes/rag_node.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from src.agenticRAG.models.state import AgentState
+from src.agenticRAG.components.llm_factory import LLMFactory
+from src.agenticRAG.components.vectorstore import VectorStoreManager
+from src.agenticRAG.prompt.prompts import Prompts
+class RAGNode:
+    """Node for RAG processing"""
+    def __init__(self):
+        self.llm = LLMFactory.get_llm()
+        self.vectorstore_manager = VectorStoreManager()
+        self.prompt = Prompts.RAG_RESPONSE
+        # Load vectorstore
+        self.vectorstore_manager.load_vectorstore()
+    def process_rag(self, state: AgentState) -> AgentState:
+        """Process RAG path - retrieve from knowledge base"""
+        try:
+            # Retrieve documents
+            docs = self.vectorstore_manager.search_documents(state.upgraded_query, k=3)
+            state.retrieved_docs = docs
+            # Generate response with retrieved context
+            chain = self.prompt | self.llm
+            context = "\n".join(docs) if docs else "No relevant documents found."
+            response = chain.invoke({
+                "query": state.upgraded_query,
+                "context": context
+            })
+            state.final_response = response.content
+            state.metadata["rag_success"] = True
+        except Exception as e:
+            state.final_response = "Sorry, I couldn't retrieve information from the knowledge base."
+            state.metadata["rag_success"] = False
+            state.metadata["rag_error"] = str(e)
+        return state
+# Node function for LangGraph
+def rag_node(state: AgentState) -> AgentState:
+    """Node function for RAG processing"""
+    rag_processor = RAGNode()
+    return rag_processor.process_rag(state)

src/agenticRAG/nodes/web_search_node.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from src.agenticRAG.models.state import AgentState
+from src.agenticRAG.components.llm_factory import LLMFactory
+from src.agenticRAG.components.search_tools import SearchToolFactory
+from src.agenticRAG.prompt.prompts import Prompts
+class WebSearchNode:
+    """Node for web search processing"""
+    def __init__(self):
+        self.llm = LLMFactory.get_llm()
+        self.search_tool = SearchToolFactory.get_search_tool()
+        self.prompt = Prompts.WEB_RESPONSE
+    def process_web_search(self, state: AgentState) -> AgentState:
+        """Process web search path"""
+        try:
+            # Perform web search
+            search_results = self.search_tool.run(state.upgraded_query)
+            state.search_results = [search_results]
+            # Generate response with search results
+            chain = self.prompt | self.llm
+            response = chain.invoke({
+                "query": state.upgraded_query,
+                "search_results": search_results
+            })
+            state.final_response = response.content
+            state.metadata["web_search_success"] = True
+        except Exception as e:
+            state.final_response = "Sorry, I couldn't perform web search at the moment."
+            state.metadata["web_search_success"] = False
+            state.metadata["web_search_error"] = str(e)
+        return state
+# Node function for LangGraph
+def web_search_node(state: AgentState) -> AgentState:
+    """Node function for web search processing"""
+    web_processor = WebSearchNode()
+    return web_processor.process_web_search(state)

src/agenticRAG/prompt/__init__.py ADDED Viewed

File without changes

src/agenticRAG/prompt/prompts.py ADDED Viewed

	@@ -0,0 +1,159 @@

+# from langchain_core.prompts import ChatPromptTemplate
+# import json
+# class Prompts:
+#     """Centralized prompt templates"""
+#     QUERY_UPGRADER = ChatPromptTemplate.from_messages([
+#         ("system", """You are a query enhancement specialist. Your task is to improve user queries for better information retrieval.
+#         Enhancement guidelines:
+#         1. Add relevant keywords and synonyms
+#         2. Clarify ambiguous terms
+#         3. Expand abbreviations and acronyms
+#         4. Add context when missing
+#         5. Maintain original intent
+#         6. Keep enhanced query concise (under 200 characters)
+#         Return only the enhanced query, nothing else."""),
+#         ("human", "Original query: {query}")
+#     ])
+#     QUERY_ROUTER = ChatPromptTemplate.from_messages([
+#         ("system", """You are a query router. Analyze the query and decide which path to take:
+#         PATHS:
+#         1. "RAG" - For queries about specific knowledge base content, documents, or domain expertise
+#         2. "WEB" - For current events, real-time information, recent news, or trending topics
+#         3. "DIRECT" - For general conversation, creative tasks, opinions, or reasoning without specific facts
+#         DECISION CRITERIA:
+#         - RAG: Domain-specific questions, technical documentation, specific facts from knowledge base
+#         - WEB: Questions with temporal keywords (latest, current, recent, today), current events, real-time data
+#         - DIRECT: General chat, creative writing, opinions, mathematical reasoning, casual conversation
+#         Respond with only one word: RAG, WEB, or DIRECT"""),
+#         ("human", "Query: {query}")
+#     ])
+#     RAG_RESPONSE = ChatPromptTemplate.from_messages([
+#         ("system", """You are a helpful assistant. Answer the user's question based on the provided context from the knowledge base.
+#         Context: {context}
+#         If the context doesn't contain relevant information, say so clearly."""),
+#         ("human", "{query}")
+#     ])
+#     WEB_RESPONSE = ChatPromptTemplate.from_messages([
+#         ("system", """You are a helpful assistant. Answer the user's question based on the provided web search results.
+#         Search Results: {search_results}
+#         Provide a comprehensive answer based on the search results. If the results don't contain relevant information, say so clearly."""),
+#         ("human", "{query}")
+#     ])
+#     DIRECT_RESPONSE = ChatPromptTemplate.from_messages([
+#         ("system", """You are a helpful AI assistant. Answer the user's question directly using your knowledge and reasoning capabilities.
+#         Be conversational, accurate, and helpful. If you're unsure about something, acknowledge the uncertainty."""),
+#         ("human", "{query}")
+#     ])
+# def load_data_relative():
+#     """Load data.json using relative path"""
+#     try:
+#         with open("knowledge_base_metadata.json", 'r', encoding='utf-8') as f:
+#             data = json.load(f)
+#         description = ""
+#         for key in data:
+#             description +=f"{key['description']}\n"
+#         return description
+#     except FileNotFoundError:
+#         print("data.json not found in current directory")
+#         return None
+#     except json.JSONDecodeError as e:
+#         print(f"Error decoding JSON: {e}")
+#         return None
+# if __name__=="__main__":
+#     print(load_data_relative())
+from langchain_core.prompts import ChatPromptTemplate
+import json
+class Prompts:
+    """Centralized prompt templates"""
+    @staticmethod
+    def load_kb_description():
+        """Dynamically load knowledge base descriptions"""
+        try:
+            with open("knowledge_base_metadata.json", 'r', encoding='utf-8') as f:
+                data = json.load(f)
+            description = ""
+            for item in data:
+                description += f"- {item.get('description', '').strip()}\n"
+            return description.strip()
+        except FileNotFoundError:
+            return "No knowledge base found."
+        except json.JSONDecodeError as e:
+            return f"Error decoding knowledge base: {e}"
+    @classmethod
+    def query_router(cls):
+        """Return QUERY_ROUTER with dynamic KB info"""
+        kb_description = cls.load_kb_description()
+        return ChatPromptTemplate.from_messages([
+            ("system", f"""You are a query router. Analyze the query and decide which path to take:
+PATHS:
+1. "RAG" - For queries about specific knowledge base content, documents, or domain expertise
+2. "WEB" - For current events, real-time information, recent news, or trending topics
+3. "DIRECT" - For general conversation, creative tasks, opinions, or reasoning without specific facts
+DECISION CRITERIA:
+- RAG: Domain-specific questions, technical documentation, specific facts from knowledge base
+- WEB: Questions with temporal keywords (latest, current, recent, today), current events, real-time data
+- DIRECT: General chat, creative writing, opinions, mathematical reasoning, casual conversation
+Knowledge Base contains:
+{kb_description}
+Respond with only one word: RAG, WEB, or DIRECT"""),
+            ("human", "Query: {{query}}")
+        ])
+    # Other prompts stay unchanged
+    QUERY_UPGRADER = ChatPromptTemplate.from_messages([
+        ("system", """You are a query enhancement specialist..."""),  # shortened for brevity
+        ("human", "Original query: {query}")
+    ])
+    RAG_RESPONSE = ChatPromptTemplate.from_messages([
+        ("system", """You are a helpful assistant. Answer the user's question based on the provided context from the knowledge base.
+        Context: {context}
+        If the context doesn't contain relevant information, say so clearly."""),
+        ("human", "{query}")
+    ])
+    WEB_RESPONSE = ChatPromptTemplate.from_messages([
+        ("system", """You are a helpful assistant. Answer the user's question based on the provided web search results.
+        Search Results: {search_results}
+        Provide a comprehensive answer..."""),
+        ("human", "{query}")
+    ])
+    DIRECT_RESPONSE = ChatPromptTemplate.from_messages([
+        ("system", """You are a helpful AI assistant. Answer the user's question directly using your knowledge and reasoning capabilities."""),
+        ("human", "{query}")
+    ])
+if __name__ == "__main__":
+    prompt = Prompts.query_router()
+    print(prompt.format(query="What is the architecture of the Omani AI system?"))

src/config/__init__.py ADDED Viewed

File without changes

src/config/settings.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+from typing import Dict, Any
+from dotenv import load_dotenv
+load_dotenv()
+class Settings:
+    """Configuration settings for the AgenticRAG system"""
+    # API Keys
+    GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
+    GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
+    GOOGLE_CSE_ID: str = os.getenv("GOOGLE_CSE_ID", "")
+    # Model Configuration
+    GROQ_MODEL: str = "llama3-8b-8192"
+    GROQ_TEMPERATURE: float = 0.1
+    OPENAI_MODEL: str = "gpt-4.1-nano-2025-04-14"
+    OPENAI_TEMPERATURE: float = 0.3
+    OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
+    # Embedding Model
+    EMBEDDING_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
+    OPENAI_EMBEDDING_MODEL = "text-embedding-3-large"
+    # Vector Store
+    VECTORSTORE_PATH: str = "data/vectorstore"
+    # Search Configuration
+    SEARCH_RESULTS_COUNT: int = 5
+    SERPER_API_KEY: str = os.getenv("SERPER_API_KEY", "")
+    TAVILY_API_KEY: str = os.getenv("TAVILY_API_KEY", "")
+    # Query Enhancement
+    MAX_QUERY_LENGTH: int = 200
+    # Routing Configuration
+    DEFAULT_ROUTE: str = "DIRECT"
+    @classmethod
+    def validate(cls) -> bool:
+        """Validate required settings"""
+        required_keys = ["GROQ_API_KEY"]
+        for key in required_keys:
+            if not getattr(cls, key):
+                raise ValueError(f"Missing required setting: {key}")
+        return True
+settings = Settings()