Spaces:

mominah
/

Chatbot-backend

Sleeping

App Files Files Community

mominah commited on Mar 2, 2025

Commit

7b7cab6

verified ·

1 Parent(s): 8360eea

Upload 11 files

Browse files

Files changed (11) hide show

Dockerfile +25 -0
chat_management.py +94 -0
document_loaders.py +193 -0
embedding.py +11 -0
llm_initialization.py +17 -0
main.py +355 -0
prompt_templates.py +242 -0
requirements.txt +23 -0
retrieval_chain.py +110 -0
text_splitter.py +30 -0
vector_store.py +234 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+# Base image using Python 3.9
+FROM python:3.9
+# Create a new user to run the app
+RUN useradd -m -u 1000 user
+USER user
+# Set environment variables
+ENV PATH="/home/user/.local/bin:$PATH"
+# Set the working directory
+WORKDIR /app
+# Copy the requirements and install dependencies
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy the rest of the application
+COPY --chown=user . /app
+# Expose port 7860 for the application
+EXPOSE 7860
+# Command to run the FastAPI app using uvicorn
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

chat_management.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import uuid
+from pymongo import MongoClient
+from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
+class ChatManagement:
+    def __init__(self, cluster_url, database_name, collection_name):
+        """
+        Initializes the ChatManagement class with MongoDB connection details.
+        Args:
+            cluster_url (str): MongoDB cluster URL.
+            database_name (str): Name of the database.
+            collection_name (str): Name of the collection.
+        """
+        self.connection_string = cluster_url
+        self.database_name = database_name
+        self.collection_name = collection_name
+        self.chat_sessions = {}  # Dictionary to store chat history objects for each session
+    def create_new_chat(self):
+        """
+        Creates a new chat session by initializing a MongoDBChatMessageHistory object.
+        Returns:
+            str: The unique chat ID.
+        """
+        # Generate a unique chat ID
+        chat_id = str(uuid.uuid4())
+        # Initialize MongoDBChatMessageHistory for the chat session
+        chat_message_history = MongoDBChatMessageHistory(
+            session_id=chat_id,
+            connection_string=self.connection_string,
+            database_name=self.database_name,
+            collection_name=self.collection_name,
+        )
+        # Store the chat_message_history object in the session dictionary
+        self.chat_sessions[chat_id] = chat_message_history
+        return chat_id
+    def get_chat_history(self, chat_id):
+        """
+        Retrieves the MongoDBChatMessageHistory object for a given chat session by its chat ID.
+        Args:
+            chat_id (str): The unique ID of the chat session.
+        Returns:
+            MongoDBChatMessageHistory or None: The chat history object of the chat session, or None if not found.
+        """
+        # Check if the chat session is already in memory
+        if chat_id in self.chat_sessions:
+            return self.chat_sessions[chat_id]
+        # If not in memory, try to fetch from the database
+        chat_message_history = MongoDBChatMessageHistory(
+            session_id=chat_id,
+            connection_string=self.connection_string,
+            database_name=self.database_name,
+            collection_name=self.collection_name,
+        )
+        if chat_message_history.messages:  # Check if the session exists in the database
+            self.chat_sessions[chat_id] = chat_message_history
+            return chat_message_history
+        return None  # Chat session not found
+    def initialize_chat_history(self, chat_id):
+        """
+        Initializes a new chat history for the given chat ID if it does not already exist.
+        Args:
+            chat_id (str): The unique ID of the chat session.
+        Returns:
+            MongoDBChatMessageHistory: The initialized chat history object.
+        """
+        # If the chat history already exists, return it
+        if chat_id in self.chat_sessions:
+            return self.chat_sessions[chat_id]
+        # Otherwise, create a new chat history
+        chat_message_history = MongoDBChatMessageHistory(
+            session_id=chat_id,
+            connection_string=self.connection_string,
+            database_name=self.database_name,
+            collection_name=self.collection_name,
+        )
+        # Save the new chat session to the session dictionary
+        self.chat_sessions[chat_id] = chat_message_history
+        return chat_message_history

document_loaders.py ADDED Viewed

	@@ -0,0 +1,193 @@

+from langchain_community.document_loaders  import (CSVLoader, WikipediaLoader, UnstructuredURLLoader,
+                                        YoutubeLoader, PyPDFLoader, BSHTMLLoader,
+                                        Docx2txtLoader, UnstructuredMarkdownLoader)
+from langchain_unstructured import UnstructuredLoader
+class DocumentLoader:
+    def load_unstructured(self, path):
+        """
+        Load data from a file at the specified path:
+        supported files:
+        "csv", "doc", "docx", "epub", "image", "md", "msg", "odt", "org", "pdf", "ppt", "pptx", "rtf", "rst", "tsv", "xlsx"
+        Args:
+            path (str): The file paths
+        Returns:
+            The loaded  data.
+        Exceptions:
+            Prints an error message if the loading fails.
+        """
+        try:
+            loader = UnstructuredLoader(path)
+            data = loader.load()
+            return data
+        except Exception as e:
+            print(f"Error loading Unstructured: {e}")
+    def load_csv(self, path):
+        """
+        Load data from a CSV file at the specified path.
+        Args:
+            path (str): The file path to the CSV file.
+        Returns:
+            The loaded CSV data.
+        Exceptions:
+            Prints an error message if the CSV loading fails.
+        """
+        try:
+            loader = CSVLoader(file_path=path)
+            data = loader.load()
+            return data
+        except Exception as e:
+            print(f"Error loading CSV: {e}")
+    def wikipedia_query(self, search_query):
+        """
+        Query Wikipedia using a given search term and return the results.
+        Args:
+            search_query (str): The search term to query on Wikipedia.
+        Returns:
+            The query results.
+        Exceptions:
+            Prints an error message if the Wikipedia query fails.
+        """
+        try:
+            data = WikipediaLoader(query=search_query, load_max_docs=2).load()
+            return data
+        except Exception as e:
+            print(f"Error querying Wikipedia: {e}")
+    def load_urls(self, urls):
+        """
+        Load and parse content from a list of URLs.
+        Args:
+            urls (list): A list of URLs to load.
+        Returns:
+            The loaded data from the URLs.
+        Exceptions:
+            Prints an error message if loading URLs fails.
+        """
+        try:
+            loader = UnstructuredURLLoader(urls=urls)
+            data = loader.load()
+            return data
+        except Exception as e:
+            print(f"Error loading URLs: {e}")
+    def load_YouTubeVideo(self, urls):
+        """
+        Load YouTube video information from provided URLs.
+        Args:
+            urls (list): A list of YouTube video URLs.
+        Returns:
+            The loaded documents from the YouTube URLs.
+        Exceptions:
+            Prints an error message if loading YouTube videos fails.
+        """
+        try:
+            loader = YoutubeLoader.from_youtube_url(
+                urls, add_video_info=True, language=["en", "pt", "zh-Hans", "es", "ur", "hi"],
+                translation="en")
+            documents = loader.load()
+            return documents
+        except Exception as e:
+            print(f"Error loading YouTube video: {e}")
+    def load_pdf(self, path):
+        """
+        Load data from a PDF file at the specified path.
+        Args:
+            path (str): The file path to the PDF file.
+        Returns:
+            The loaded and split PDF pages.
+        Exceptions:
+            Prints an error message if the PDF loading fails.
+        """
+        try:
+            loader = PyPDFLoader(path)
+            pages = loader.load_and_split()
+            return pages
+        except Exception as e:
+            print(f"Error loading PDF: {e}")
+    def load_text_from_html(self, path):
+        """
+        Load and parse text content from an HTML file at the specified path.
+        Args:
+            path (str): The file path to the HTML file.
+        Returns:
+            The loaded HTML data.
+        Exceptions:
+            Prints an error message if loading text from HTML fails.
+        """
+        try:
+            loader = BSHTMLLoader(path)
+            data = loader.load()
+            return data
+        except Exception as e:
+            print(f"Error loading text from HTML: {e}")
+    def load_markdown(self, path):
+        """
+        Load data from a Markdown file at the specified path.
+        Args:
+            path (str): The file path to the Markdown file.
+        Returns:
+            The loaded Markdown data.
+        Exceptions:
+            Prints an error message if loading Markdown fails.
+        """
+        try:
+            loader = UnstructuredMarkdownLoader(path)
+            data = loader.load()
+            return data
+        except Exception as e:
+            print(f"Error loading Markdown: {e}")
+    def load_doc(self, path):
+        """
+        Load data from a DOCX file at the specified path.
+        Args:
+            path (str): The file path to the DOCX file.
+        Returns:
+            The loaded DOCX data.
+        Exceptions:
+            Prints an error message if loading DOCX fails.
+        """
+        try:
+            loader = Docx2txtLoader(path)
+            data = loader.load()
+            return data
+        except Exception as e:
+            print(f"Error loading DOCX: {e}")

embedding.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from langchain_huggingface import HuggingFaceEmbeddings
+def get_embeddings():
+    # Initialize HuggingFace embeddings
+    model_name = "BAAI/bge-small-en"
+    model_kwargs = {"device": "cpu"}
+    encode_kwargs = {"normalize_embeddings": True}
+    embeddings = HuggingFaceEmbeddings(
+        model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
+    )
+    return embeddings

llm_initialization.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from langchain_groq import ChatGroq
+def get_llm():
+    """
+    Returns the language model instance (LLM) using ChatGroq API.
+    The LLM used is Llama 3.1 with a versatile 70 billion parameters model.
+    Returns:
+        llm (ChatGroq): An instance of the ChatGroq LLM.
+    """
+    llm = ChatGroq(
+        model="llama-3.3-70b-versatile",
+        temperature=0,
+        max_tokens=1024,
+        api_key='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'
+    )
+    return llm

main.py ADDED Viewed

	@@ -0,0 +1,355 @@

+import os
+import tempfile
+import zipfile
+from typing import List, Optional
+from fastapi import FastAPI, File, UploadFile, HTTPException, Query
+from fastapi.responses import FileResponse, StreamingResponse
+from llm_initialization import get_llm
+from embedding import get_embeddings
+from document_loaders import DocumentLoader
+from text_splitter import TextSplitter
+from vector_store import VectorStoreManager
+from prompt_templates import PromptTemplates
+from chat_management import ChatManagement
+from retrieval_chain import RetrievalChain
+from urllib.parse import quote_plus
+from dotenv import load_dotenv
+from pymongo import MongoClient
+# Load environment variables
+load_dotenv()
+MONGO_PASSWORD = quote_plus(os.getenv("MONGO_PASSWORD"))
+MONGO_DATABASE_NAME = os.getenv("DATABASE_NAME")
+MONGO_COLLECTION_NAME = os.getenv("COLLECTION_NAME")
+MONGO_CLUSTER_URL = os.getenv("CONNECTION_STRING")
+app = FastAPI(title="VectorStore & Document Management API")
+# Global variables (initialized on startup)
+llm = None
+embeddings = None
+chat_manager = None
+document_loader = None
+text_splitter = None
+vector_store_manager = None
+vector_store = None
+k = 3  # Number of documents to retrieve per query
+# Global MongoDB collection to store retrieval chain configuration per chat session.
+chat_chains_collection = None
+# ----------------------- Startup Event -----------------------
+@app.on_event("startup")
+async def startup_event():
+    global llm, embeddings, chat_manager, document_loader, text_splitter, vector_store_manager, vector_store, chat_chains_collection
+    print("Starting up: Initializing components...")
+    # Initialize LLM and embeddings
+    llm = get_llm()
+    print("LLM initialized.")
+    embeddings = get_embeddings()
+    print("Embeddings initialized.")
+    # Setup chat management
+    chat_manager = ChatManagement(
+        cluster_url=MONGO_CLUSTER_URL,
+        database_name=MONGO_DATABASE_NAME,
+        collection_name=MONGO_COLLECTION_NAME,
+    )
+    print("Chat management initialized.")
+    # Initialize document loader and text splitter
+    document_loader = DocumentLoader()
+    text_splitter = TextSplitter()
+    print("Document loader and text splitter initialized.")
+    # Initialize vector store manager and ensure vectorstore is set
+    vector_store_manager = VectorStoreManager(embeddings)
+    vector_store = vector_store_manager.vectorstore  # Now properly initialized
+    print("Vector store initialized.")
+    # Connect to MongoDB and get the collection.
+    client = MongoClient(MONGO_CLUSTER_URL)
+    db = client[MONGO_DATABASE_NAME]
+    chat_chains_collection = db["chat_chains"]
+    print("Chat chains collection initialized in MongoDB.")
+# ----------------------- Root Endpoint -----------------------
+@app.get("/")
+def root():
+    """
+    Root endpoint that returns a welcome message.
+    """
+    return {"message": "Welcome to the VectorStore & Document Management API!"}
+# ----------------------- New Chat Endpoint -----------------------
+@app.post("/new_chat")
+def new_chat():
+    """
+    Create a new chat session.
+    """
+    new_chat_id = chat_manager.create_new_chat()
+    return {"chat_id": new_chat_id}
+# ----------------------- Create Chain Endpoint -----------------------
+@app.post("/create_chain")
+def create_chain(
+    chat_id: str = Query(..., description="Existing chat session ID"),
+    template: str = Query(
+        "quiz_solving",
+        description="Select prompt template. Options: quiz_solving, assignment_solving, paper_solving, quiz_creation, assignment_creation, paper_creation",
+    ),
+):
+    global chat_chains_collection  # Ensure we reference the global variable
+    valid_templates = [
+        "quiz_solving",
+        "assignment_solving",
+        "paper_solving",
+        "quiz_creation",
+        "assignment_creation",
+        "paper_creation",
+    ]
+    if template not in valid_templates:
+        raise HTTPException(status_code=400, detail="Invalid template selection.")
+    # Upsert the configuration document for this chat session.
+    chat_chains_collection.update_one(
+        {"chat_id": chat_id}, {"$set": {"template": template}}, upsert=True
+    )
+    return {"message": "Retrieval chain configuration stored successfully.", "chat_id": chat_id, "template": template}
+# ----------------------- Chat Endpoint -----------------------
+@app.get("/chat")
+def chat(query: str, chat_id: str = Query(..., description="Chat session ID created via /new_chat and configured via /create_chain")):
+    """
+    Process a chat query using the retrieval chain associated with the given chat_id.
+    This endpoint uses the following code:
+        try:
+            stream_generator = retrieval_chain.stream_chat_response(
+                query=query,
+                chat_id=chat_id,
+                get_chat_history=chat_manager.get_chat_history,
+                initialize_chat_history=chat_manager.initialize_chat_history,
+            )
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Error processing chat query: {str(e)}")
+        return StreamingResponse(stream_generator, media_type="text/event-stream")
+    It first retrieves the configuration from MongoDB, re-creates the chain, and then streams the response.
+    """
+    # Retrieve the chat configuration from MongoDB.
+    config = chat_chains_collection.find_one({"chat_id": chat_id})
+    if not config:
+        raise HTTPException(status_code=400, detail="Chat configuration not found. Please create a chain using /create_chain.")
+    template = config.get("template", "quiz_solving")
+    if template == "quiz_solving":
+        prompt = PromptTemplates.get_quiz_solving_prompt()
+    elif template == "assignment_solving":
+        prompt = PromptTemplates.get_assignment_solving_prompt()
+    elif template == "paper_solving":
+        prompt = PromptTemplates.get_paper_solving_prompt()
+    elif template == "quiz_creation":
+        prompt = PromptTemplates.get_quiz_creation_prompt()
+    elif template == "assignment_creation":
+        prompt = PromptTemplates.get_assignment_creation_prompt()
+    elif template == "paper_creation":
+        prompt = PromptTemplates.get_paper_creation_prompt()
+    else:
+        raise HTTPException(status_code=400, detail="Invalid chat configuration.")
+    # Re-create the retrieval chain for this chat session.
+    retrieval_chain = RetrievalChain(
+        llm,
+        vector_store.as_retriever(search_kwargs={"k": k}),
+        prompt,
+        verbose=True,
+    )
+    try:
+        stream_generator = retrieval_chain.stream_chat_response(
+            query=query,
+            chat_id=chat_id,
+            get_chat_history=chat_manager.get_chat_history,
+            initialize_chat_history=chat_manager.initialize_chat_history,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing chat query: {str(e)}")
+    return StreamingResponse(stream_generator, media_type="text/event-stream")
+# ----------------------- Add Document Endpoint -----------------------
+from typing import Any, Optional
+@app.post("/add_document")
+async def add_document(
+    file: Optional[Any] = File(None),
+    wiki_query: Optional[str] = Query(None),
+    wiki_url: Optional[str] = Query(None)
+):
+    """
+    Upload a document OR load data from a Wikipedia query or URL.
+    - If a file is provided, the document is loaded from the file.
+    - If 'wiki_query' is provided, the Wikipedia page(s) are loaded using document_loader.wikipedia_query.
+    - If 'wiki_url' is provided, the URL is loaded using document_loader.load_urls.
+    The loaded document(s) are then split into chunks and added to the vector store.
+    """
+    # If file is provided but not as an UploadFile (e.g. an empty string), set it to None.
+    if not isinstance(file, UploadFile):
+        file = None
+    # Ensure at least one input is provided.
+    if file is None and wiki_query is None and wiki_url is None:
+        raise HTTPException(status_code=400, detail="No document input provided (file, wiki_query, or wiki_url).")
+    # Load document(s) based on input priority: file > wiki_query > wiki_url.
+    if file is not None:
+        with tempfile.NamedTemporaryFile(delete=False) as tmp:
+            contents = await file.read()
+            tmp.write(contents)
+            tmp_filename = tmp.name
+        ext = file.filename.split(".")[-1].lower()
+        try:
+            if ext == "pdf":
+                documents = document_loader.load_pdf(tmp_filename)
+            elif ext == "csv":
+                documents = document_loader.load_csv(tmp_filename)
+            elif ext in ["doc", "docx"]:
+                documents = document_loader.load_doc(tmp_filename)
+            elif ext in ["html", "htm"]:
+                documents = document_loader.load_text_from_html(tmp_filename)
+            elif ext in ["md", "markdown"]:
+                documents = document_loader.load_markdown(tmp_filename)
+            else:
+                documents = document_loader.load_unstructured(tmp_filename)
+        except Exception as e:
+            os.remove(tmp_filename)
+            raise HTTPException(status_code=400, detail=f"Error loading document from file: {str(e)}")
+        os.remove(tmp_filename)
+    elif wiki_query is not None:
+        try:
+            documents = document_loader.wikipedia_query(wiki_query)
+        except Exception as e:
+            raise HTTPException(status_code=400, detail=f"Error loading Wikipedia query: {str(e)}")
+    elif wiki_url is not None:
+        try:
+            documents = document_loader.load_urls([wiki_url])
+        except Exception as e:
+            raise HTTPException(status_code=400, detail=f"Error loading URL: {str(e)}")
+    try:
+        chunks = text_splitter.split_documents(documents)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error splitting document: {str(e)}")
+    try:
+        ids = vector_store_manager.add_documents(chunks)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error indexing document chunks: {str(e)}")
+    return {"message": f"Added {len(chunks)} document chunks.", "ids": ids}
+# ----------------------- Delete Document Endpoint -----------------------
+@app.post("/delete_document")
+def delete_document(ids: List[str]):
+    """
+    Delete document(s) from the vector store using their IDs.
+    """
+    try:
+        success = vector_store_manager.delete_documents(ids)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error deleting documents: {str(e)}")
+    if not success:
+        raise HTTPException(status_code=400, detail="Failed to delete documents.")
+    return {"message": f"Deleted documents with IDs: {ids}"}
+# ----------------------- Save Vectorstore Endpoint -----------------------
+@app.get("/save_vectorstore")
+def save_vectorstore():
+    """
+    Save the current vector store locally.
+    If it is a directory, it will be zipped.
+    Returns the file as a downloadable response.
+    """
+    try:
+        save_result = vector_store_manager.save("faiss_index")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error saving vectorstore: {str(e)}")
+    return FileResponse(
+        path=save_result["file_path"],
+        media_type=save_result["media_type"],
+        filename=save_result["serve_filename"],
+    )
+# ----------------------- Load Vectorstore Endpoint -----------------------
+@app.post("/load_vectorstore")
+async def load_vectorstore(file: UploadFile = File(...)):
+    """
+    Load a vector store from an uploaded file (raw or zipped).
+    This will replace the current vector store.
+    """
+    tmp_filename = None
+    try:
+        # Save the uploaded file content to a temporary file.
+        with tempfile.NamedTemporaryFile(delete=False) as tmp:
+            file_bytes = await file.read()  # await to get bytes
+            tmp.write(file_bytes)
+            tmp_filename = tmp.name
+        instance, message = VectorStoreManager.load(tmp_filename, embeddings)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error loading vectorstore: {str(e)}")
+    finally:
+        if tmp_filename and os.path.exists(tmp_filename):
+            os.remove(tmp_filename)
+    global vector_store_manager
+    vector_store_manager = instance
+    return {"message": message}
+# ----------------------- Merge Vectorstore Endpoint -----------------------
+@app.post("/merge_vectorstore")
+async def merge_vectorstore(file: UploadFile = File(...)):
+    """
+    Merge an uploaded vector store (raw or zipped) into the current vector store.
+    """
+    tmp_filename = None
+    try:
+        # Save the uploaded file content to a temporary file.
+        with tempfile.NamedTemporaryFile(delete=False) as tmp:
+            file_bytes = await file.read()  # Await the file.read() coroutine!
+            tmp.write(file_bytes)
+            tmp_filename = tmp.name
+        # Pass the filename (a string) to the merge method.
+        result = vector_store_manager.merge(tmp_filename, embeddings)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error merging vectorstore: {str(e)}")
+    finally:
+        if tmp_filename and os.path.exists(tmp_filename):
+            os.remove(tmp_filename)
+    return result
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

prompt_templates.py ADDED Viewed

	@@ -0,0 +1,242 @@

+from langchain.prompts import ChatPromptTemplate
+class PromptTemplates:
+    """
+    A class to encapsulate various prompt templates for solving assignments, papers, creating quizzes, and assignments.
+    """
+    @staticmethod
+    def get_quiz_solving_prompt():
+        quiz_solving_prompt = '''
+        You are an assistant specialized in solving quizzes. Your goal is to provide accurate, concise, and contextually relevant answers.
+        Use the following retrieved context to answer the user's question.
+        If the context lacks sufficient information, respond with "I don't know." Do not make up answers or provide unverified information.
+        Guidelines:
+        1. Extract key information from the context to form a coherent response.
+        2. Maintain a clear and professional tone.
+        3. If the question requires clarification, specify it politely.
+        Retrieved context:
+        {context}
+        User's question:
+        {question}
+        Your response:
+        '''
+        # Create a prompt template to pass the context and user input to the chain
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", quiz_solving_prompt),
+                ("human", "{question}"),
+            ]
+        )
+        return prompt
+    @staticmethod
+    def get_assignment_solving_prompt():
+        # Prompt template for solving assignments
+        assignment_solving_prompt = '''
+        You are an expert assistant specializing in solving academic assignments with clarity and precision.
+        Your task is to provide step-by-step solutions and detailed explanations that align with the given requirements.
+        Retrieved context:
+        {context}
+        Assignment Details:
+        {question}
+        Guidelines:
+        1. **Understand the Problem:** Carefully analyze the assignment details to identify the objective and requirements.
+        2. **Provide a Step-by-Step Solution:** Break down the solution into clear, logical steps. Use examples where appropriate.
+        3. **Explain Your Reasoning:** Include concise explanations for each step to enhance understanding.
+        4. **Follow Formatting Rules:** Ensure the response matches any specified formatting or citation guidelines.
+        5. **Maintain Academic Integrity:** Do not fabricate information, copy content verbatim without attribution, or complete the task in a way that breaches academic honesty policies.
+        Deliverable:
+        Provide the final answer in the format outlined in the assignment description. Where relevant, include:
+        - A brief introduction summarizing the approach.
+        - Calculations or code (if applicable).
+        - Any necessary diagrams, tables, or figures (use textual descriptions for diagrams if unavailable).
+        - A conclusion summarizing the findings.
+        If the assignment details are incomplete or ambiguous, specify what additional information is required to proceed.
+        Assignment Response:
+        '''
+        # Create a prompt template to pass the context and user input to the chain
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", assignment_solving_prompt),
+                ("human", "{question}"),
+            ]
+        )
+        return prompt
+    @staticmethod
+    def get_paper_solving_prompt():
+        # Prompt template for solving papers
+        paper_solving_prompt = '''
+        You are an expert assistant specialized in solving academic papers with precision and clarity.
+        Your task is to provide well-structured answers to the questions in the paper, ensuring accuracy, depth, and adherence to any specified instructions.
+        Retrieved context:
+        {context}
+        Paper Information:
+        {question}
+        Instructions:
+        1. **Understand Each Question:** Read each question carefully and identify its requirements, keywords, and scope.
+        2. **Structured Responses:** Provide answers in a clear, logical structure (e.g., Introduction, Body, Conclusion).
+        3. **Depth and Accuracy:** Support answers with explanations, examples, calculations, or references where applicable.
+        4. **Formatting Guidelines:** Adhere to any specified format or style (e.g., bullet points, paragraphs, equations).
+        5. **Time Efficiency:** If the paper is timed, prioritize accuracy and completeness over excessive detail.
+        6. **Clarify Ambiguities:** If any question is unclear, mention the assumptions made while answering.
+        7. **Ethical Guidelines:** Ensure the answers are original and aligned with academic integrity standards.
+        Deliverables:
+        - Answer all questions to the best of your ability.
+        - Include relevant diagrams, tables, or code (describe diagrams in text if unavailable).
+        - Summarize key points in a conclusion where applicable.
+        - Clearly number and label answers to match the questions in the paper.
+        If the paper includes multiple sections, label each section and solve sequentially.
+        Paper Solution:
+        '''
+        # Create a prompt template to pass the context and user input to the chain
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", paper_solving_prompt),
+                ("human", "{question}"),
+            ]
+        )
+        return prompt
+    @staticmethod
+    def get_quiz_creation_prompt():
+        # Prompt template for creating a quiz
+        quiz_creation_prompt = '''
+        You are an expert assistant specializing in creating engaging and educational quizzes for students.
+        Your task is to design a quiz based on the topic, difficulty level, and format specified by the teacher.
+        Retrieved context:
+        {context}
+        Quiz Details:
+        Topic: {question}
+        Guidelines for Quiz Creation:
+        1. **Relevance to Topic:** Ensure all questions are directly related to the specified topic.
+        2. **Clear and Concise Wording:** Write questions clearly and concisely to avoid ambiguity.
+        3. **Diverse Question Types:** Incorporate a variety of question types if specified.
+        4. **Appropriate Difficulty:** Tailor the complexity of the questions to match the target audience and difficulty level.
+        5. **Answer Key:** Provide correct answers or explanations for each question.
+        Deliverables:
+        - A complete quiz with numbered questions.
+        - An answer key with correct answers and explanations where relevant.
+        Quiz:
+        '''
+        # Create a prompt template to pass the context and user input to the chain
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", quiz_creation_prompt),
+                ("human", "{question}"),
+            ]
+        )
+        return prompt
+    @staticmethod
+    def get_assignment_creation_prompt():
+        # Prompt template for creating an assignment
+        assignment_creation_prompt = '''
+        You are an expert assistant specializing in designing assignments that align with the educational goals and requirements of teachers.
+        Your task is to create a comprehensive assignment based on the provided topic, target audience, and desired outcomes.
+        Retrieved context:
+        {context}
+        Assignment Details:
+        Topic: {question}
+        Guidelines for Assignment Creation:
+        1. **Alignment with Topic:** Ensure all tasks/questions are closely related to the specified topic and designed to achieve the teacher’s learning objectives.
+        2. **Clear Instructions:** Provide detailed and clear instructions for each question or task.
+        3. **Encourage Critical Thinking:** Include questions or tasks that require analysis, creativity, and application of knowledge where appropriate.
+        4. **Variety of Tasks:** Incorporate diverse question types (e.g., short answers, essays, practical tasks) as per the specified format.
+        5. **Grading Rubric (Optional):** Include a grading rubric or evaluation criteria if specified in the instructions.
+        Deliverables:
+        - A detailed assignment with numbered tasks/questions.
+        - Any required supporting materials (e.g., diagrams, data tables, references).
+        - (Optional) A grading rubric or expected outcomes for each task.
+        Assignment:
+        '''
+        # Create a prompt template to pass the context and user input to the chain
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", assignment_creation_prompt),
+                ("human", "{question}"),
+            ]
+        )
+        return prompt
+    @staticmethod
+    def get_paper_creation_prompt():
+        # Prompt template for creating an academic paper
+        paper_creation_prompt = '''
+        You are an expert assistant specializing in designing comprehensive academic papers tailored to the educational goals and requirements of teachers.
+        Your task is to create a complete paper based on the specified topic, audience, format, and difficulty level.
+        Retrieved context:
+        {context}
+        Paper Details:
+        Subject/Topic: {question}
+        Guidelines for Paper Creation:
+        1. **Relevance and Alignment:** Ensure all questions align with the specified subject/topic and are tailored to the target audience’s curriculum or learning objectives.
+        2. **Clear Wording:** Write questions in clear, concise language to avoid ambiguity or confusion.
+        3. **Diverse Question Types:** Incorporate a variety of question formats as specified (e.g., multiple-choice, fill-in-the-blank, long-form essays).
+        4. **Grading and Marks Allocation:** Provide a suggested mark allocation for each question, ensuring it reflects the question's complexity and time required.
+        5. **Answer Key:** Include correct answers or model responses for objective and descriptive questions (optional).
+        Deliverables:
+        - A complete paper with numbered questions, organized by sections if required.
+        - An answer key or marking scheme (if requested).
+        - Any supporting materials (e.g., diagrams, charts, or data tables) if applicable.
+        Paper:
+        '''
+        # Create a prompt template to pass the context and user input to the chain
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", paper_creation_prompt),
+                ("human", "{question}"),
+            ]
+        )
+        return prompt

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+fastapi
+# python-jose
+python-dotenv
+# bcrypt
+# passlib
+uvicorn
+# pyjwt
+python-multipart
+# pydantic[email]
+pymongo
+faiss-cpu
+sentence_transformers
+langchain_groq
+langchain-community
+langchain_unstructured
+unstructured[all-docs]
+unstructured[docx]
+unstructured
+unstructured[pdf]
+langchain-mongodb
+langchain_huggingface
+wikipedia
+docx2txt

retrieval_chain.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from langchain.chains import ConversationalRetrievalChain
+from langchain.prompts import ChatPromptTemplate
+class RetrievalChain:
+    def __init__(self, llm, retriever, user_prompt, verbose=False):
+        """
+        Initializes the RetrievalChain with an LLM and retriever.
+        Args:
+            llm: Language model to use for the conversational chain.
+            retriever: Retriever object to fetch relevant documents.
+            user_prompt: Custom prompt to guide the chain.
+            verbose (bool): Whether to print verbose chain outputs.
+        """
+        self.llm = llm
+        self.chain = ConversationalRetrievalChain.from_llm(
+            llm=llm,
+            retriever=retriever,
+            return_source_documents=True,
+            chain_type='stuff',
+            combine_docs_chain_kwargs={"prompt": user_prompt},
+            verbose=verbose,
+        )
+    def summarize_messages(self, chat_history):
+        """
+        Summarizes the chat history into a concise message.
+        Args:
+            chat_history: The chat history object for the session.
+        Returns:
+            bool: True if summarization is successful, False otherwise.
+        """
+        stored_messages = chat_history.messages
+        if len(stored_messages) == 0:
+            return False
+        summarization_prompt = ChatPromptTemplate.from_messages(
+            [
+                ("placeholder", "{chat_history}"),
+                (
+                    "human",
+                    "Summarize the above chat messages into a single concise message. Include only the important specific details.",
+                ),
+            ]
+        )
+        # Create a chain for summarization by piping the prompt into the language model.
+        summarization_chain = summarization_prompt | self.llm
+        summary_message = summarization_chain.invoke({"chat_history": stored_messages})
+        chat_history.clear()  # Clear the existing chat history
+        chat_history.add_ai_message(summary_message.content)  # Add the summary message as the first entry
+        return True
+    def stream_chat_response(self, query, chat_id, get_chat_history, initialize_chat_history):
+        """
+        Streams the response to a query in real-time for a given chat session using SSE formatting.
+        Args:
+            query (str): The user's query.
+            chat_id (str): The unique ID of the chat session.
+            get_chat_history (function): Function to retrieve chat history by chat ID.
+            initialize_chat_history (function): Function to initialize a new chat history.
+        Yields:
+            str: Server-Sent Event (SSE) formatted string for each chunk of the response.
+        """
+        # Retrieve the chat history for the session.
+        chat_message_history = get_chat_history(chat_id)
+        if not chat_message_history:
+            # If no chat history exists, initialize one.
+            chat_message_history = initialize_chat_history(chat_id)
+        # Optionally summarize previous messages.
+        self.summarize_messages(chat_message_history)
+        chat_history = chat_message_history.messages
+        # Prepare input data for the conversational retrieval chain.
+        input_data_for_chain = {
+            "question": query,
+            "chat_history": chat_history
+        }
+        # Add the user query to the chat history.
+        chat_message_history.add_user_message(query)
+        # Execute the chain in streaming mode (this assumes the chain supports a `stream` method).
+        response_stream = self.chain.stream(input_data_for_chain)
+        accumulated_response = ""
+        # Process the response stream and yield SSE events.
+        for chunk in response_stream:
+            if 'answer' in chunk:
+                accumulated_response += chunk['answer']
+                # Format the SSE event.
+                sse_event = f"data: {chunk['answer']}\n\n"
+                yield sse_event
+            else:
+                # Yield an SSE event with debug info if the chunk structure is unexpected.
+                debug_msg = f"Unexpected chunk structure: {chunk}"
+                yield f"data: {debug_msg}\n\n"
+        # Once streaming is complete, update chat history with the final response.
+        if accumulated_response:
+            chat_message_history.add_ai_message(accumulated_response)
+        else:
+            yield "data: No valid response content was generated.\n\n"

text_splitter.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from langchain_text_splitters import RecursiveCharacterTextSplitter
+class TextSplitter:
+    def __init__(self, chunk_size=1024, chunk_overlap=100):
+        """
+        Initialize the TextSplitter with a specific chunk size and overlap.
+        Args:
+            chunk_size (int): The size of each text chunk.
+            chunk_overlap (int): The overlap size between chunks.
+        """
+        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    def split_documents(self, documents):
+        """
+        Split the provided documents into chunks based on the chunk size and overlap.
+        Args:
+            documents (list): A list of documents to be split.
+        Returns:
+            A list of split documents.
+        Exceptions:
+            Prints an error message if splitting documents fails.
+        """
+        try:
+            return self.text_splitter.split_documents(documents)
+        except Exception as e:
+            print(f"Error splitting documents: {e}")

vector_store.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import os
+import uuid
+import shutil
+import tempfile
+import zipfile
+from faiss import IndexFlatL2
+from langchain_community.vectorstores import FAISS
+from langchain_community.docstore.in_memory import InMemoryDocstore
+class VectorStoreManager:
+    def __init__(self, embeddings=None):
+        """
+        Initializes the VectorStoreManager with a FAISS vector store.
+        Args:
+            embeddings (Embeddings, optional): Embeddings model used for the vector store.
+        """
+        self.vectorstore = None
+        if embeddings:
+            self.vectorstore = self.create_vectorstore(embeddings)
+    def create_vectorstore(self, embeddings):
+        """
+        Creates and initializes a FAISS vector store.
+        Args:
+            embeddings (Embeddings): Embeddings model used for the vector store.
+        Returns:
+            FAISS: Initialized vector store.
+        """
+        # Define vector store dimensions based on embeddings
+        dimensions = len(embeddings.embed_query("dummy"))
+        # Initialize FAISS vector store
+        vectorstore = FAISS(
+            embedding_function=embeddings,
+            index=IndexFlatL2(dimensions),
+            docstore=InMemoryDocstore(),
+            index_to_docstore_id={},
+            normalize_L2=False
+        )
+        print("Created a new FAISS vector store.")
+        return vectorstore
+    def add_documents(self, documents):
+        """
+        Adds new documents to the FAISS vector store, each document with a unique UUID.
+        Args:
+            documents (list): List of Document objects to be added to the vector store.
+        Returns:
+            list: List of UUIDs corresponding to the added documents.
+        """
+        if not self.vectorstore:
+            raise ValueError("Vector store is not initialized. Please create or load a vector store first.")
+        uuids = [str(uuid.uuid4()) for _ in range(len(documents))]
+        self.vectorstore.add_documents(documents=documents, ids=uuids)
+        print(f"Added {len(documents)} documents to the vector store with IDs: {uuids}")
+        return uuids
+    def delete_documents(self, ids):
+        """
+        Deletes documents from the FAISS vector store using their unique IDs.
+        Args:
+            ids (list): List of UUIDs corresponding to the documents to be deleted.
+        Returns:
+            bool: True if the documents were successfully deleted, False otherwise.
+        """
+        if not self.vectorstore:
+            raise ValueError("Vector store is not initialized. Please create or load a vector store first.")
+        if not ids:
+            print("No document IDs provided for deletion.")
+            return False
+        success = self.vectorstore.delete(ids=ids)
+        if success:
+            print(f"Successfully deleted documents with IDs: {ids}")
+        else:
+            print(f"Failed to delete documents with IDs: {ids}")
+        return success
+    def save(self, filename="faiss_index"):
+        """
+        Saves the current FAISS vector store locally. If the saved store is a directory,
+        it compresses it into a ZIP archive.
+        Args:
+            filename (str): The filename or directory name where the vector store will be saved.
+        Returns:
+            dict: A dictionary with details about the saved file including file path and media type.
+        """
+        if not self.vectorstore:
+            raise ValueError("Vector store is not initialized. Please create or load a vector store first.")
+        # Save the vectorstore locally
+        self.vectorstore.save_local(filename)
+        print(f"Vector store saved to {filename}")
+        if not os.path.exists(filename):
+            raise FileNotFoundError("Saved vectorstore not found.")
+        # If the saved vectorstore is a directory, compress it into a zip file.
+        if os.path.isdir(filename):
+            zip_filename = filename + ".zip"
+            shutil.make_archive(filename, 'zip', filename)
+            return {
+                "file_path": zip_filename,
+                "media_type": "application/zip",
+                "serve_filename": os.path.basename(zip_filename),
+                "original": filename,
+            }
+        else:
+            return {
+                "file_path": filename,
+                "media_type": "application/octet-stream",
+                "serve_filename": os.path.basename(filename),
+                "original": filename,
+            }
+    @staticmethod
+    def load(file_input, embeddings):
+        """
+        Loads a FAISS vector store from an uploaded file or a filename.
+        If file_input is a file-like object, it is saved to a temporary file.
+        If it's a string (filename), it is used directly.
+        """
+        # Check if file_input is a string (filename) or a file-like object.
+        if isinstance(file_input, str):
+            tmp_filename = file_input
+        else:
+            with tempfile.NamedTemporaryFile(delete=False) as tmp:
+                tmp.write(file_input.read())
+                tmp_filename = tmp.name
+        try:
+            if zipfile.is_zipfile(tmp_filename):
+                with tempfile.TemporaryDirectory() as extract_dir:
+                    with zipfile.ZipFile(tmp_filename, 'r') as zip_ref:
+                        zip_ref.extractall(extract_dir)
+                    extracted_items = os.listdir(extract_dir)
+                    if len(extracted_items) == 1:
+                        potential_dir = os.path.join(extract_dir, extracted_items[0])
+                        if os.path.isdir(potential_dir):
+                            vectorstore_dir = potential_dir
+                        else:
+                            vectorstore_dir = extract_dir
+                    else:
+                        vectorstore_dir = extract_dir
+                    new_vectorstore = FAISS.load_local(vectorstore_dir, embeddings, allow_dangerous_deserialization=True)
+                message = "Vector store loaded successfully from ZIP."
+            else:
+                new_vectorstore = FAISS.load_local(tmp_filename, embeddings, allow_dangerous_deserialization=True)
+                message = "Vector store loaded successfully."
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Error loading vectorstore: {str(e)}")
+        finally:
+            # Only remove the temp file if we created it here (i.e. file_input was not a filename)
+            if not isinstance(file_input, str) and os.path.exists(tmp_filename):
+                os.remove(tmp_filename)
+        instance = VectorStoreManager()
+        instance.vectorstore = new_vectorstore
+        print(message)
+        return instance, message
+    def merge(self, file_input, embeddings):
+        """
+        Merges an uploaded vector store file into the current FAISS vector store.
+        Args:
+            file_input (Union[file-like object, str]): An object with a .read() method or a filename (str).
+            embeddings (Embeddings): Embeddings model used for loading the vector store.
+        Returns:
+            dict: A dictionary containing a message indicating successful merging.
+        """
+        # Determine if file_input is a filename (str) or a file-like object.
+        if isinstance(file_input, str):
+            tmp_filename = file_input
+            temp_created = False
+        else:
+            with tempfile.NamedTemporaryFile(delete=False) as tmp:
+                tmp.write(file_input.read())
+                tmp_filename = tmp.name
+            temp_created = True
+        try:
+            # Check if the file is a ZIP archive.
+            if zipfile.is_zipfile(tmp_filename):
+                with tempfile.TemporaryDirectory() as extract_dir:
+                    with zipfile.ZipFile(tmp_filename, 'r') as zip_ref:
+                        zip_ref.extractall(extract_dir)
+                    extracted_items = os.listdir(extract_dir)
+                    if len(extracted_items) == 1:
+                        potential_dir = os.path.join(extract_dir, extracted_items[0])
+                        if os.path.isdir(potential_dir):
+                            vectorstore_dir = potential_dir
+                        else:
+                            vectorstore_dir = extract_dir
+                    else:
+                        vectorstore_dir = extract_dir
+                    source_store = FAISS.load_local(
+                        vectorstore_dir, embeddings, allow_dangerous_deserialization=True
+                    )
+            else:
+                source_store = FAISS.load_local(
+                    tmp_filename, embeddings, allow_dangerous_deserialization=True
+                )
+            if not self.vectorstore:
+                raise ValueError("Vector store is not initialized. Please create or load a vector store first.")
+            self.vectorstore.merge_from(source_store)
+            print("Successfully merged the source vector store into the current vector store.")
+        except Exception as e:
+            raise Exception(f"Error merging vectorstore: {str(e)}")
+        finally:
+            if temp_created and os.path.exists(tmp_filename):
+                os.remove(tmp_filename)
+        return {"message": "Vector stores merged successfully"}