Spaces:

duythduong
/

fpt-chat

Sleeping

App Files Files Community

duythduong commited on Jul 12, 2024

Commit

d45043b

1 Parent(s): 72bf066

feat: use firebase for documents, add readme

Browse files

Files changed (11) hide show

README.md +81 -0
agent.py +0 -16
apis/v1/configs/firebase_config.py +30 -0
apis/v1/configs/{llm_configs.py → llm_config.py} +0 -0
apis/v1/controllers/document_controller.py +41 -37
apis/v1/controllers/rag_controller.py +1 -1
apis/v1/providers/__init__.py +3 -1
apis/v1/providers/firebase_provider.py +87 -0
apis/v1/routes/documents.py +25 -24
app.py +2 -0
test.py +0 -54

README.md CHANGED Viewed

	@@ -0,0 +1,81 @@

+# RAG
+This project provides a web interface to upload PDF documents and ask questions about their content. It uses a FastAPI backend to process the documents and provide summarized responses, and a Streamlit frontend to create an interactive user interface.
+## Table of Contents
+- [Installation](#installation)
+- [Usage](#usage)
+- [Contributing](#contributing)
+- [License](#license)
+## Installation
+### Prerequisites
+- Python 3.7+
+- `pip` (Python package installer)
+### Steps
+1. Clone the repository:
+```bash
+git clone https://github.com/dtduy77/RAG.git
+cd RAG
+```
+2. Create and activate a virtual environment:
+```bash
+python -m venv env
+source env/bin/activate  # On Windows use `env\Scripts\activate
+```
+3. Install the required dependencies:
+```bash
+pip install -r requirements.txt
+```
+4. Run the FastAPI server:
+```bash
+uvicorn main:app --reload
+```
+5. Run the Streamlit app in another terminal:
+```bash
+streamlit run app.py
+```
+## Usage
+1. Open your web browser and go to http://localhost:8501.
+2. Use the file uploader to select a PDF document.
+3. Enter a question related to the content of the document.
+4. Click on "Get Summary" to receive a summarized response.
+## Contributing
+1. Fork the repository.
+2. Create a new branch (git checkout -b feature/your-feature).
+3. Make your changes.
+4. Commit your changes (git commit -m 'Add your message').
+5. Push to the branch (git push origin feature/your-feature).
+6. Open a Pull Request.
+## License
+This project is open-source.
+Feel free to contribute and enhance the functionality!

agent.py DELETED Viewed

@@ -1,16 +0,0 @@
-from langchain_openai import ChatOpenAI
-from langchain import hub
-from langchain.agents import create_tool_calling_agent
-from langchain.agents import AgentExecutor
-llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
-# Get the prompt to use - you can modify this!
-prompt = hub.pull("hwchase17/openai-functions-agent")
-# prompt.messages
-agent = create_tool_calling_agent(llm, tools, prompt)
-agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

apis/v1/configs/firebase_config.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import firebase_admin
+from firebase_admin import credentials, firestore
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# get credentials from .env
+credential_firebase = {
+    "type": os.getenv("TYPE"),
+    "project_id": os.getenv("PROJECT_ID"),
+    "private_key_id": os.getenv("PRIVATE_KEY_ID"),
+    "private_key": os.getenv("PRIVATE_KEY").replace('\\n', '\n'),
+    "client_email": os.getenv("CLIENT_EMAIL"),
+    "client_id": os.getenv("CLIENT_ID"),
+    "auth_uri": os.getenv("AUTH_URI"),
+    "token_uri": os.getenv("TOKEN_URI"),
+    "auth_provider_x509_cert_url": os.getenv("AUTH_PROVIDER_X509_CERT_URL"),
+    "client_x509_cert_url": os.getenv("CLIENT_X509_CERT_URL"),
+    "universe_domain": os.getenv("UNIVERSE_DOMAIN")
+}
+# check if firebase is not initialized
+if not firebase_admin._apps:
+    # Initialize the app with a service account, granting admin privileges
+    cred = credentials.Certificate(credential_firebase)
+    app = firebase_admin.initialize_app(cred)
+# Initialize Firestore
+db = firestore.client()
+print("Database connected")

apis/v1/configs/{llm_configs.py → llm_config.py} RENAMED Viewed

File without changes

apis/v1/controllers/document_controller.py CHANGED Viewed

@@ -2,42 +2,46 @@ from typing import AnyStr
 from fastapi import UploadFile, HTTPException, status, BackgroundTasks
 import uuid
 import time
 from ..schemas.document_schema import DocSchema
-# def get_all_docs():
-#     '''
-#     Get all the documents from the database.
-#     '''
-#     return Document.objects.all()
-# def process_doc(file_path: AnyStr):
-#     '''
-#     Process a document.
-#     '''
-#     return splits
-# def _upload_docs(filename: AnyStr, doc: DocSchema):
-#     '''
-#     Get content type of file.
-#     '''
-#     # Get content type of file
-#     content_type = get_content_type(filename)
-#     path, url = storage_db.upload(data, filename, content_type)
-#     cv.update_path_url(path, url)
-#     return
-# def update_docs():
-#     '''
-#     Update a document in the database.
-#     '''
-#     return Document.objects.update()
-# def delete_docs():
-#     '''
-#     Delete a document from the database.
-#     '''
-#     return Document.objects.delete()
-# async def upload_doc()

 from fastapi import UploadFile, HTTPException, status, BackgroundTasks
 import uuid
 import time
+from ..providers import firebase_db
 from ..schemas.document_schema import DocSchema
+def upload_document(data):
+    """
+    Upload a document
+    """
+    try:
+        upload_document = firebase_db.upload_doc(data)
+        return upload_document
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+def get_document(document_id: AnyStr):
+    """
+    Get a document
+    """
+    try:
+        document = firebase_db.get_doc(document_id)
+        return document
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+def update_document(document_id: AnyStr, data):
+    """
+    Update a document
+    """
+    try:
+        update_document = firebase_db.update_doc(document_id, data)
+        return update_document
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+def delete_document(document_id: AnyStr):
+    """
+    Delete a document
+    """
+    try:
+        delete_document = firebase_db.delete_doc(document_id)
+        return delete_document
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))

apis/v1/controllers/rag_controller.py CHANGED Viewed

@@ -4,7 +4,7 @@ from langchain_core.runnables import RunnablePassthrough
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.prompts import PromptTemplate
 from langchain_pinecone import PineconeVectorStore
-from ..configs.llm_configs import gemini_model as llm
 from ..configs.word_embedding_config import mxbai_embedder
 from ..controllers.vectorstore_controller import create_vector_store
 from ..utils.prompts import rag_prompt

 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.prompts import PromptTemplate
 from langchain_pinecone import PineconeVectorStore
+from ..configs.llm_config import gemini_model as llm
 from ..configs.word_embedding_config import mxbai_embedder
 from ..controllers.vectorstore_controller import create_vector_store
 from ..utils.prompts import rag_prompt

apis/v1/providers/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from .vectorstore_provider import VectorStoreProvider
-vectorstore_db = VectorStoreProvider()

 from .vectorstore_provider import VectorStoreProvider
+from .firebase_provider import FirebaseProvider
+vectorstore_db = VectorStoreProvider()
+firebase_db = FirebaseProvider()

apis/v1/providers/firebase_provider.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from ..configs.firebase_config import db
+class FirebaseProvider:
+    def __init__(self):
+        self.collection_name = "Documents"
+        self.db = db
+    def upload_doc(self, data):
+        """
+        Uploads a document to Firestore.
+        :param collection_name: Name of the Firestore collection
+        :param data: Dictionary containing the document data
+        :return: document is successfully uploaded, error otherwise
+        """
+        try:
+            self.db.collection(self.collection_name).add(data)
+            return f"Document uploaded successfully to collection {self.collection_name}."
+        except Exception as e:
+            return (f"An error occurred: {e}")
+    def get_doc(self, document_id):
+        """
+        Retrieves a document from Firestore by collection name and document ID.
+        :param collection_name: Name of the Firestore collection
+        :param document_id: ID of the Firestore document
+        :return: Dictionary containing the document data or None if document is not found
+        """
+        try:
+            doc_ref = self.db.collection(self.collection_name).document(document_id)
+            doc = doc_ref.get()
+            if doc.exists:
+                print(f"Document with ID {document_id} retrieved successfully from collection {self.collection_name}.")
+                return doc.to_dict()
+            else:
+                print(f"No document found with ID {document_id} in collection {self.collection_name}.")
+                return None
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return None
+    def delete_doc(self, document_id):
+        """
+        Deletes a document from Firestore by collection name and document ID.
+        :param collection_name: Name of the Firestore collection
+        :param document_id: ID of the Firestore document
+        :return: document is successfully deleted, False otherwise
+        """
+        try:
+            doc_ref = self.db.collection(self.collection_name).document(document_id)
+            doc = doc_ref.get()
+            if doc.exists:
+                doc_ref.delete()
+                return f"Document with ID {document_id} deleted successfully from collection {self.collection_name}."
+            else:
+                print(f"No document found with ID {document_id} in collection {self.collection_name}.")
+                return False
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return False
+    def update_doc(self, document_id, data):
+        """
+        Updates a document in Firestore by collection name and document ID.
+        :param collection_name: Name of the Firestore collection
+        :param document_id: ID of the Firestore document
+        :param data: Dictionary containing the updated document data
+        :return: document is successfully updated, error otherwise
+        """
+        try:
+            doc_ref = self.db.collection(self.collection_name).document(document_id)
+            doc = doc_ref.get()
+            if doc.exists:
+                doc_ref.update(data)
+                return f"Document with ID {document_id} updated successfully in collection {self.collection_name}."
+            else:
+                print(f"No document found with ID {document_id} in collection {self.collection_name}.")
+                return False
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return False

apis/v1/routes/documents.py CHANGED Viewed

@@ -2,40 +2,41 @@ from typing import Annotated
 from io import BytesIO
 from pydantic import BaseModel, Field
 from fastapi import APIRouter, Depends, BackgroundTasks
 from ..interfaces.document_interface import DocumentUploadResponseInterface
 from ..utils.response_fmt import jsonResponseFmt
 router = APIRouter(prefix="/documents", tags=["Documents"])
-# @router.get("/{document_id}", response_model=DocumentResponse)
-# async def get_document(document_id: str):
-#     """
-#     Get a document
-#     """
-#     return {"document_id": document_id}
 @router.post("/upload", response_model=DocumentUploadResponseInterface)
-async def upload_document():
     """
     Upload a document
     """
-    return jsonResponseFmt(None,"Document uploaded successfully")
-# @router.update("/{document_id}", response_model=DocumentResponse)
-# async def update_document(document_id: str):
-#     """
-#     Update a document
-#     """
-#     return {"document_id": document_id}
-# @router.delete("/{document_id}")
-# async def delete_document(document_id: str):
-#     """
-#     Delete a document
-#     """
-#     return {"document_id": document_id}

 from io import BytesIO
 from pydantic import BaseModel, Field
 from fastapi import APIRouter, Depends, BackgroundTasks
+from ..controllers.document_controller import upload_document, get_document, update_document, delete_document
 from ..interfaces.document_interface import DocumentUploadResponseInterface
 from ..utils.response_fmt import jsonResponseFmt
 router = APIRouter(prefix="/documents", tags=["Documents"])
+@router.get("/{document_id}", response_model=DocumentUploadResponseInterface)
+async def get_doc(document_id: str):
+    """
+    Get a document
+    """
+    document = get_document(document_id)
+    return jsonResponseFmt(document,"Document retrieved successfully")
 @router.post("/upload", response_model=DocumentUploadResponseInterface)
+async def upload_doc(data: dict):
     """
     Upload a document
     """
+    document = upload_document(data)
+    return jsonResponseFmt(document,"Document uploaded successfully")
+@router.put("/{document_id}", response_model=DocumentUploadResponseInterface)
+async def update_doc(document_id: str, data: dict):
+    """
+    Update a document
+    """
+    document = update_document(document_id, data)
+    return jsonResponseFmt(document,"Document updated successfully")
+@router.delete("/{document_id}", response_model=DocumentUploadResponseInterface)
+async def delete_doc(document_id: str):
+    """
+    Delete a document
+    """
+    document = delete_document(document_id)
+    return jsonResponseFmt(document,"Document deleted successfully")

app.py CHANGED Viewed

@@ -30,3 +30,5 @@ if st.button("Get Summary"):
                 st.write(response.json())
     else:
         st.warning("Please upload a file and enter a question.")

                 st.write(response.json())
     else:
         st.warning("Please upload a file and enter a question.")

test.py DELETED Viewed

@@ -1,54 +0,0 @@
-from langchain_google_genai import ChatGoogleGenerativeAI
-from dotenv import load_dotenv
-import os
-from langchain_community.document_loaders import PyPDFLoader
-from langchain import hub
-from langchain_chroma import Chroma
-from langchain_community.document_loaders import WebBaseLoader
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables import RunnablePassthrough
-from langchain_openai import OpenAIEmbeddings
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_core.prompts import PromptTemplate
-from apis.v1.utils.prompts import rag_prompt
-load_dotenv()
-mxbai_embedder = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
-llm = ChatGoogleGenerativeAI(google_api_key=os.environ.get("GOOGLE_API_KEY"),
-                                   model="gemini-1.5-pro-latest")
-# Load and split the PDF document into pages
-pdf_loader = PyPDFLoader("14014749.pdf")
-# print(pdf_loader)
-pages = pdf_loader.load_and_split()
-# print(pages)
-# Split the pages into smaller chunks
-text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-splits = text_splitter.split_documents(pages)
-print(splits)
-# Create a vector store from the document splits
-vectorstore = Chroma.from_documents(documents=splits, embedding=mxbai_embedder)
-# Retrieve and generate using the relevant snippets of the blog
-retriever = vectorstore.as_retriever()
-print(retriever)
-custom_rag_prompt = PromptTemplate.from_template(rag_prompt)
-print(custom_rag_prompt)
-def format_docs(docs):
-    return "\n\n".join(doc.page_content for doc in docs)
-# Define the RAG chain
-rag_chain = (
-    {"context": retriever | format_docs, "question": RunnablePassthrough()}
-    | custom_rag_prompt
-    | llm
-    | StrOutputParser()
-)
-# Invoke the RAG chain with a question
-response = rag_chain.invoke("Can you summarize the document?")
-print(response)