Spaces:

munibz
/

rag

Sleeping

App Files Files Community

munibz commited on May 15, 2025

Commit

58d133e

verified ·

1 Parent(s): 6476594

Upload 6 files

Browse files

Files changed (6) hide show

app.py +118 -0
chat_history.txt +0 -0
embedding__model.txt +15 -0
gemini_wrapper.py +135 -0
rag.py +322 -0
requirements.txt +10 -0

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import gradio as gr
+import os
+from rag import RAGSystem
+from dotenv import load_dotenv
+from gtts import gTTS
+from docx2pdf import convert
+# Load environment variables from .env file
+load_dotenv()
+# Initialize the RAG system with default settings
+pdf_dir = "material"
+db_dir = "chroma_db"
+gemini_api_key = os.getenv("GEMINI_API_KEY")
+rag_system = RAGSystem(pdf_dir=pdf_dir, gemini_api_key=gemini_api_key, db_directory=db_dir)
+# Function to handle file upload and process the uploaded file
+def upload_and_process(file):
+    if file is not None:
+        uploaded_file_path = file.name
+        ext = os.path.splitext(uploaded_file_path)[1].lower()
+        if ext == ".docx":
+            # Convert DOCX to PDF
+            pdf_path = uploaded_file_path.replace(".docx", ".pdf")
+            convert(uploaded_file_path, pdf_path)
+            rag_system.pdf_dir = os.path.dirname(pdf_path)
+        else:
+            rag_system.pdf_dir = os.path.dirname(uploaded_file_path)
+        rag_system.process_documents()
+        return "File uploaded and processed successfully."
+    return "No file uploaded."
+# Updated function to handle user queries
+def ask_query(query):
+    if query.strip():
+        response = rag_system.generate_response(query)
+        # Append the query and response to the conversation history
+        rag_system.conversation_history.append({"user": query, "system": response})
+        audio_path = text_to_speech(response)
+        return response, audio_path
+    return "Please enter a valid query.", None
+# Function to convert text to speech and return audio file path
+def text_to_speech(response):
+    tts = gTTS(response)
+    audio_path = "response_audio.mp3"
+    tts.save(audio_path)
+    return audio_path
+# Function to clear the chat history
+def clear_chat():
+    rag_system.conversation_history = []
+    return "Chat history cleared."
+# Updated function to download the chat history
+def download_chat():
+    chat_history = "\n".join([f"User: {entry['user']}\nSystem: {entry['system']}" for entry in rag_system.conversation_history])
+    file_path = "chat_history.txt"
+    with open(file_path, "w") as file:
+        file.write(chat_history)
+    return file_path
+# --- Custom CSS for modern look ---
+custom_css = '''
+body { font-family: 'Roboto', 'Open Sans', Arial, sans-serif; }
+.gradio-container { background: linear-gradient(135deg, #f8fafc 0%, #e0f7fa 100%); }
+#rag-title { font-size: 2.2rem; font-weight: 700; color: #1e293b; letter-spacing: 1px; display: flex; align-items: center; gap: 0.5em; }
+#rag-title img { height: 2.2rem; vertical-align: middle; }
+.gr-box { border-radius: 12px !important; box-shadow: 0 2px 12px 0 rgba(16, 42, 67, 0.06); border: 1px solid #e0e7ef; }
+.gr-button { background: #14b8a6; color: #fff; border-radius: 8px; font-weight: 600; font-size: 1rem; padding: 0.7em 1.5em; transition: background 0.2s, transform 0.2s; }
+.gr-button:hover, .gr-button:focus { background: #0d9488; transform: scale(1.04); }
+.gr-text-input, .gr-textbox { border-radius: 8px; border: 1.5px solid #cbd5e1; background: #fff; font-size: 1.05rem; }
+.gr-text-input:focus, .gr-textbox:focus { border-color: #14b8a6; box-shadow: 0 0 0 2px #99f6e4; }
+.gr-audio { border-radius: 8px; background: #f1f5f9; }
+.gr-file { border-radius: 8px; border: 1.5px dashed #14b8a6; background: #f0fdfa; }
+.gr-markdown { color: #334155; }
+.fade-in { animation: fadeIn 0.7s; }
+@keyframes fadeIn { from { opacity: 0; } to { opacity: 1; } }
+@media (max-width: 700px) {
+  #rag-title { font-size: 1.3rem; }
+  .gradio-container { padding: 0.5em; }
+}
+'''
+# --- Gradio UI with modern design ---
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as ui:
+    # Branding row with logo and title
+    with gr.Row():
+        gr.Markdown("""
+        <div id='rag-title'>
+            <img src='https://img.icons8.com/color/48/000000/artificial-intelligence.png' alt='RAG Logo' />
+            RAG System UI
+        </div>
+        """, elem_id="rag-title")
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=1, min_width=320):
+            file_input = gr.File(label="Upload PDF or DOCX", file_types=[".pdf", ".docx"], elem_classes=["gr-file"])
+            upload_button = gr.Button("Upload & Process File", elem_classes=["gr-button"])
+        with gr.Column(scale=2, min_width=400):
+            query_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...", lines=1, elem_classes=["gr-text-input"])
+            response_output = gr.Textbox(label="RAG Response", lines=6, interactive=False, elem_classes=["gr-textbox", "fade-in"])
+            play_button = gr.Audio(label="Play Response", interactive=False, elem_classes=["gr-audio"])
+    with gr.Row():
+        clear_button = gr.Button("Clear Chat History", elem_classes=["gr-button"])
+        download_button = gr.Button("Download Chat History", elem_classes=["gr-button"])
+        download_file = gr.File(label="Download Chat File", elem_classes=["gr-file"])
+    # Bind functions to UI components
+    upload_button.click(upload_and_process, inputs=file_input, outputs=None)
+    query_input.submit(ask_query, inputs=query_input, outputs=[response_output, play_button])
+    clear_button.click(clear_chat, inputs=None, outputs=None)
+    download_button.click(download_chat, inputs=None, outputs=download_file)
+# Launch the Gradio app
+if __name__ == "__main__":
+    ui.launch()

chat_history.txt ADDED Viewed

File without changes

embedding__model.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+sentences = [
+    "That is a happy person",
+    "That is a happy dog",
+    "That is a very happy person",
+    "Today is a sunny day"
+]
+embeddings = model.encode(sentences)
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [4, 4]

gemini_wrapper.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import google.generativeai as genai
+class GoogleGeminiWrapper:
+    def __init__(self, api_key: str):
+        """
+        Initialize the GoogleGeminiWrapper with the API key.
+        :param api_key: Your Google Gemini API key.
+        """
+        self.api_key = api_key
+        genai.configure(api_key=self.api_key)
+        self.conversation_history = [] # For the new chat method
+        self.chat_session = None # To store the chat session for gemini
+    def ask(self, prompt: str, model: str = "gemini-2.0-flash", max_tokens: int = 150, temperature: float = 0.7) -> str:
+        """
+        Send a prompt to the Google Gemini model and get a response (single turn).
+        :param prompt: The input prompt to send to the model.
+        :param model: The model to use (default is "gemini-pro").
+        :param max_tokens: The maximum number of tokens to include in the response.
+        :param temperature: Sampling temperature (higher values mean more randomness).
+        :return: The response from the model as a string.
+        """
+        try:
+            generation_config = {
+                "temperature": temperature,
+                "max_output_tokens": max_tokens,
+            }
+            model_instance = genai.GenerativeModel(model_name=model, generation_config=generation_config)
+            response = model_instance.generate_content(prompt)
+            return response.text.strip()
+        except Exception as e:
+            return f"An error occurred: {e}"
+    def start_chat_session(self, model: str = "gemini-2.0-flash", temperature: float = 0.7, max_tokens: int = 150):
+        """
+        Starts a new chat session or continues an existing one.
+        """
+        generation_config = {
+            "temperature": temperature,
+            "max_output_tokens": max_tokens,
+        }
+        model_instance = genai.GenerativeModel(model_name=model, generation_config=generation_config)
+        # For Gemini, conversation history is managed by the chat object itself.
+        # We re-initialize the chat session if one doesn't exist or if we want to start fresh.
+        # If you want to persist history across calls to `chat` without explicitly calling reset,
+        # you might initialize `self.chat_session` in `__init__` or when `chat` is first called.
+        self.chat_session = model_instance.start_chat(history=self.conversation_history)
+    def chat(self, prompt: str, model: str = "gemini-2.0-flash", max_tokens: int = 150, temperature: float = 0.7) -> str:
+        """
+        Send a prompt to the Google Gemini model, maintaining conversation history for context.
+        :param prompt: The input prompt to send to the model.
+        :param model: The model to use (default is "gemini-pro").
+        :param max_tokens: The maximum number of tokens to include in the response.
+        :param temperature: Sampling temperature (higher values mean more randomness).
+        :return: The response from the model as a string.
+        """
+        try:
+            if self.chat_session is None:
+                self.start_chat_session(model=model, temperature=temperature, max_tokens=max_tokens)
+            response = self.chat_session.send_message(prompt)
+            assistant_response = response.text.strip()
+            # Gemini's chat session object updates its history internally.
+            # We can optionally also store it in our self.conversation_history if needed for other purposes
+            # or if we want to be able to reconstruct the chat session later.
+            # For simplicity here, we rely on the chat_session's internal history.
+            # To manually track:
+            # self.conversation_history.append({"role": "user", "parts": [prompt]})
+            # self.conversation_history.append({"role": "model", "parts": [assistant_response]})
+            return assistant_response
+        except Exception as e:
+            # Reset chat session on error to avoid issues with subsequent calls
+            self.chat_session = None
+            return f"An error occurred: {e}"
+    def reset_conversation(self):
+        """
+        Reset the conversation history and the chat session.
+        """
+        self.conversation_history = []
+        self.chat_session = None # Crucial for Gemini to start a fresh chat
+    def list_available_models(self):
+        """
+        Lists available Gemini models.
+        :return: A list of available models.
+        """
+        try:
+            print("Available Gemini Models:")
+            for m in genai.list_models():
+                if 'generateContent' in m.supported_generation_methods:
+                    print(m.name)
+            return [m.name for m in genai.list_models() if 'generateContent' in m.supported_generation_methods]
+        except Exception as e:
+            return f"An error occurred while listing models: {e}"
+# Example usage (uncomment to test):
+if __name__ == "__main__":
+    api_key = "AIzaSyBisxoehBz8UF0i9kX42f1V3jp-9RNq04g" # Replace with your actual key
+    wrapper = GoogleGeminiWrapper(api_key)
+    # Example 0: List available models
+    # print("\nListing available models...")
+    # available_models = wrapper.list_available_models()
+    # The function already prints, but you can use the returned list if needed
+    # print(available_models)
+#
+    # Example 1: Simple one-off question
+    response_ask = wrapper.ask("What is the largest planet in our solar system?")
+    print(f"Ask response: {response_ask}")
+#
+#     # Example 2: Conversation with history
+#     print("\nStarting chat conversation...")
+#     response1 = wrapper.chat("Hi, my name is Alex.")
+#     print(f"Chat response 1: {response1}")
+#
+#     response2 = wrapper.chat("What is my name?")
+#     print(f"Chat response 2: {response2}") # Should remember "Alex"
+#
+#     response3 = wrapper.chat("What was the first thing I asked you in this chat?")
+#     print(f"Chat response 3: {response3}")
+#
+#     # Reset conversation history
+#     wrapper.reset_conversation()
+#     print("\nConversation reset.")
+#
+#     response4 = wrapper.chat("Do you remember my name?")
+#     print(f"Chat response 4 (after reset): {response4}") # Should not remember "Alex"

rag.py ADDED Viewed

	@@ -0,0 +1,322 @@

+#!/usr/bin/env python3
+"""
+RAG (Retrieval Augmented Generation) System
+-------------------------------------------
+This module implements a RAG system that processes PDF documents,
+uses ChromaDB as a vector database, sentence-transformers for embeddings,
+and Google's Gemini as the main LLM. The system follows a conversational pattern.
+"""
+import os
+import logging
+from typing import List, Dict, Any, Optional
+# Document processing
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+# Embeddings
+from sentence_transformers import SentenceTransformer
+# Vector database
+import chromadb
+from chromadb.utils import embedding_functions
+# For Gemini LLM integration
+from gemini_wrapper import GoogleGeminiWrapper
+from gtts import gTTS
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class RAGSystem:
+    """
+    A Retrieval Augmented Generation system that processes PDF documents,
+    stores their embeddings in a vector database, and generates responses
+    using the Google Gemini model.
+    """
+    def __init__(
+        self,
+        pdf_dir: str,
+        gemini_api_key: str,
+        embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
+        chunk_size: int = 1000,
+        chunk_overlap: int = 200,
+        db_directory: str = "./chroma_db"
+    ):
+        """
+        Initialize the RAG system.
+        Args:
+            pdf_dir: Directory containing PDF documents
+            gemini_api_key: API key for Google Gemini
+            embedding_model_name: Name of the sentence-transformers model
+            chunk_size: Size of text chunks for splitting documents
+            chunk_overlap: Overlap between consecutive chunks
+            db_directory: Directory to store the ChromaDB database
+        """
+        self.pdf_dir = pdf_dir
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.db_directory = db_directory
+        # Initialize the embedding model
+        logger.info(f"Loading embedding model: {embedding_model_name}")
+        self.embedding_model = SentenceTransformer(embedding_model_name)
+        # Initialize the text splitter
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=self.chunk_size,
+            chunk_overlap=self.chunk_overlap,
+        )
+        # Initialize ChromaDB
+        logger.info(f"Initializing ChromaDB at {db_directory}")
+        self.client = chromadb.PersistentClient(path=db_directory)
+        # Create a custom embedding function that uses sentence-transformers
+        self.sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
+            model_name=embedding_model_name
+        )
+        # Create or get the collection
+        self.collection = self.client.get_or_create_collection(
+            name="pdf_documents",
+            embedding_function=self.sentence_transformer_ef
+        )
+        # Initialize the Gemini LLM
+        logger.info("Initializing Google Gemini")
+        self.llm = GoogleGeminiWrapper(api_key=gemini_api_key)
+        # Load conversation history
+        self.conversation_history = []
+    def process_documents(self) -> None:
+        """
+        Process all PDF documents in the specified directory,
+        split them into chunks, generate embeddings, and store in ChromaDB.
+        """
+        logger.info(f"Processing documents from: {self.pdf_dir}")
+        # Check if documents are already processed
+        if self.collection.count() > 0:
+            logger.info(f"Found {self.collection.count()} existing document chunks in the database")
+            return
+        # Process each PDF file in the directory
+        pdf_files = [f for f in os.listdir(self.pdf_dir) if f.endswith('.pdf')]
+        if not pdf_files:
+            logger.warning(f"No PDF files found in {self.pdf_dir}")
+            return
+        logger.info(f"Found {len(pdf_files)} PDF files")
+        doc_chunks = []
+        metadatas = []
+        ids = []
+        chunk_idx = 0
+        for pdf_file in pdf_files:
+            pdf_path = os.path.join(self.pdf_dir, pdf_file)
+            logger.info(f"Processing: {pdf_path}")
+            # Load PDF
+            loader = PyPDFLoader(pdf_path)
+            documents = loader.load()
+            # Split documents into chunks
+            chunks = self.text_splitter.split_documents(documents)
+            logger.info(f"Split {pdf_file} into {len(chunks)} chunks")
+            # Prepare data for ChromaDB
+            for chunk in chunks:
+                doc_chunks.append(chunk.page_content)
+                metadatas.append({
+                    "source": pdf_file,
+                    "page": chunk.metadata.get("page", 0),
+                })
+                ids.append(f"chunk_{chunk_idx}")
+                chunk_idx += 1
+        # Add documents to ChromaDB
+        if doc_chunks:
+            logger.info(f"Adding {len(doc_chunks)} chunks to ChromaDB")
+            self.collection.add(
+                documents=doc_chunks,
+                metadatas=metadatas,
+                ids=ids
+            )
+            logger.info("Documents successfully processed and stored")
+        else:
+            logger.warning("No document chunks were generated")
+    def retrieve_relevant_chunks(self, query: str, k: int = 3) -> List[Dict[str, Any]]:
+        """
+        Retrieve the k most relevant document chunks for a given query.
+        Args:
+            query: The query text
+            k: Number of relevant chunks to retrieve
+        Returns:
+            List of relevant document chunks with their metadata
+        """
+        logger.info(f"Retrieving {k} relevant chunks for query: {query}")
+        results = self.collection.query(
+            query_texts=[query],
+            n_results=k
+        )
+        relevant_chunks = []
+        if results and results["documents"] and results["documents"][0]:
+            for i, doc in enumerate(results["documents"][0]):
+                relevant_chunks.append({
+                    "content": doc,
+                    "metadata": results["metadatas"][0][i] if results["metadatas"] and results["metadatas"][0] else {},
+                    "id": results["ids"][0][i] if results["ids"] and results["ids"][0] else f"unknown_{i}"
+                })
+        return relevant_chunks
+    def generate_response(self, query: str, k: int = 3) -> str:
+        """
+        Generate a response for a user query using RAG.
+        Args:
+            query: User query
+            k: Number of relevant chunks to retrieve
+        Returns:
+            Generated response from the LLM
+        """
+        # Retrieve relevant document chunks
+        relevant_chunks = self.retrieve_relevant_chunks(query, k=k)
+        if not relevant_chunks:
+            logger.warning("No relevant chunks found for the query")
+            return "I couldn't find relevant information to answer your question."
+        # Format context from retrieved chunks
+        context = "\n\n".join([f"Document {i+1} (from {chunk['metadata'].get('source', 'unknown')}, page {chunk['metadata'].get('page', 'unknown')}):\n{chunk['content']}"
+                              for i, chunk in enumerate(relevant_chunks)])
+        # Create prompt for the LLM
+        prompt = f"""
+        You are a helpful assistant that answers questions based on the provided context.
+        CONTEXT:
+        {context}
+        QUESTION:
+        {query}
+        Please provide a comprehensive and accurate answer based only on the information in the provided context.
+        If the context doesn't contain enough information to answer the question, please say so.
+        """
+        # Generate response using Gemini
+        response = self.llm.ask(prompt, max_tokens=500, temperature=0.3)
+        return response
+    def chat(self, user_input: str = None) -> Optional[str]:
+        """
+        Conduct a conversation with the user using the RAG system.
+        Args:
+            user_input: User's input. If None, starts a new conversation.
+        Returns:
+            System's response or None to exit
+        """
+        if user_input is None:
+            # Initialize conversation
+            print("RAG System Initialized. Type 'exit' or 'quit' to end the conversation.")
+            user_input = input("You: ")
+        if user_input.lower() in ['exit', 'quit']:
+            print("Ending conversation. Goodbye!")
+            return None
+        # Generate response using RAG
+        response = self.generate_response(user_input)
+        # Update conversation history
+        self.conversation_history.append({"user": user_input, "system": response})
+        return response
+    def interactive_session(self) -> None:
+        """
+        Start an interactive chat session with the RAG system.
+        """
+        print("Welcome to the RAG System!")
+        print("Type 'exit' or 'quit' to end the conversation.")
+        while True:
+            user_input = input("\nYou: ")
+            if user_input.lower() in ['exit', 'quit']:
+                print("Ending conversation. Goodbye!")
+                break
+            response = self.generate_response(user_input)
+            print(f"\nRAG System: {response}")
+# Function to convert text to speech
+def text_to_speech(response):
+    tts = gTTS(response)
+    audio_path = "response_audio.mp3"
+    tts.save(audio_path)
+    return audio_path
+def main():
+    """
+    Main function to demonstrate the RAG system.
+    """
+    # Attempt to get the Gemini API key from environment variable
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        # If environment variable is not set or is empty, fallback to the hardcoded key
+        hardcoded_api_key = "AIzaSyBisxoehBz8UF0i9kX42f1V3jp-9RNq04g" # Your hardcoded key
+        # Check if the environment variable was truly not set (vs. set to an empty string)
+        # to decide if we should print the INFO message.
+        if os.getenv("GEMINI_API_KEY") is None: # More specific check for unset env variable
+            print("INFO: GEMINI_API_KEY environment variable not found. Using hardcoded API key from rag.py.")
+        gemini_api_key = hardcoded_api_key
+    # Final check: if the key is still not set (e.g. if hardcoded key was also empty or None)
+    if not gemini_api_key:
+        print("Error: Gemini API key is not set.")
+        print("Please set the GEMINI_API_KEY environment variable, or ensure it's correctly hardcoded in rag.py.")
+        print("To set as environment variable:")
+        print("  export GEMINI_API_KEY='your_api_key'  # For Linux/macOS")
+        print("  set GEMINI_API_KEY=your_api_key      # For Windows CMD")
+        print("  $env:GEMINI_API_KEY='your_api_key'   # For Windows PowerShell")
+        return
+    # Set paths
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    pdf_dir = os.path.join(current_dir, "material")
+    db_dir = os.path.join(current_dir, "chroma_db")
+    # Initialize the RAG system
+    rag = RAGSystem(
+        pdf_dir=pdf_dir,
+        gemini_api_key=gemini_api_key,
+        db_directory=db_dir
+    )
+    # Process documents
+    rag.process_documents()
+    # Start interactive session
+    rag.interactive_session()
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio
+python-dotenv
+gtts
+docx2pdf
+langchain
+langchain-community
+sentence-transformers
+chromadb
+pypdf
+google-generativeai