Spaces:

duythduong
/

fpt-chat

Sleeping

App Files Files Community

duythduong commited on Jun 22, 2024

Commit

32c2637

1 Parent(s): 253280e

chore: add document route

Browse files

Files changed (19) hide show

apis/__init__.py +2 -0
apis/__pycache__/__init__.cpython-312.pyc +0 -0
apis/v1/configs/llm_configs.py +2 -1
apis/v1/controllers/document_controller.py +38 -0
apis/v1/controllers/rag_controller.py +22 -0
apis/v1/controllers/vectorstore_controller.py +10 -0
apis/v1/interfaces/__pycache__/document_interface.cpython-312.pyc +0 -0
apis/v1/interfaces/document_interface.py +5 -0
apis/v1/interfaces/rag_interface.py +8 -0
apis/v1/providers/{vectordb_provider.py → vectorstore_provider.py} +0 -0
apis/v1/routes/__pycache__/documents.cpython-312.pyc +0 -0
apis/v1/routes/documents.py +26 -1
apis/v1/routes/rag.py +15 -0
apis/v1/schemas/__pycache__/document_schema.cpython-312.pyc +0 -0
apis/v1/schemas/document_schema.py +46 -3
apis/v1/schemas/rag_schema.py +14 -0
apis/v1/utils/__pycache__/prompts.cpython-311.pyc +0 -0
apis/v1/utils/response_fmt.py +9 -0
apis/v1/utils/utils.py +7 -0

apis/__init__.py CHANGED Viewed

@@ -3,4 +3,6 @@ from .v1.routes.documents import router as documents_router
 api_v1_router = APIRouter(prefix="/v1")
 api_v1_router.include_router(documents_router)

 api_v1_router = APIRouter(prefix="/v1")
+# Register routes for the API
 api_v1_router.include_router(documents_router)

apis/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (384 Bytes). View file

apis/v1/configs/llm_configs.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
 from langchain_openai import ChatOpenAI
 from langchain_google_genai import ChatGoogleGenerativeAI
 gpt_model = ChatOpenAI(api_key=os.environ.get('OPENAI_API_KEY'), temperature=0,
                        request_timeout=120, streaming=True, model="gpt-3.5-turbo-0125")

 import os
 from langchain_openai import ChatOpenAI
 from langchain_google_genai import ChatGoogleGenerativeAI
+from dotenv import load_dotenv
+load_dotenv()
 gpt_model = ChatOpenAI(api_key=os.environ.get('OPENAI_API_KEY'), temperature=0,
                        request_timeout=120, streaming=True, model="gpt-3.5-turbo-0125")

apis/v1/controllers/document_controller.py CHANGED Viewed

	@@ -0,0 +1,38 @@

+from typing import AnyStr
+from fastapi import UploadFile, HTTPException, status, BackgroundTasks
+import uuid
+import time
+from ..schemas.document_schema import DocSchema
+# def get_all_docs():
+#     '''
+#     Get all the documents from the database.
+#     '''
+#     return Document.objects.all()
+def _upload_docs(filename: AnyStr, doc: DocSchema):
+    '''
+    Get content type of file.
+    '''
+    # Get content type of file
+    content_type = get_content_type(filename)
+    path, url = storage_db.upload(data, filename, content_type)
+    cv.update_path_url(path, url)
+    return
+# def update_docs():
+#     '''
+#     Update a document in the database.
+#     '''
+#     return Document.objects.update()
+# def delete_docs():
+#     '''
+#     Delete a document from the database.
+#     '''
+#     return Document.objects.delete()
+async def upload_doc()

apis/v1/controllers/rag_controller.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.prompts import PromptTemplate
+from ..utils.prompts import rag_prompt
+from ..configs.llm_configs import gemini_model as llm
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+def invoke(retriever, question:str) -> str:
+    prompt = PromptTemplate.from_template(rag_prompt)
+    rag_chain = (
+        {"context": retriever | format_docs, "question": RunnablePassthrough()}
+        | prompt
+        | llm
+        | StrOutputParser()
+    )
+    response = rag_chain.invoke(question)
+    return response

apis/v1/controllers/vectorstore_controller.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from langchain_chroma import Chroma
+from ..configs.word_embedding_config import mxbai_embedder
+def create_vector_store(split_docs):
+    # Create a vector store from the document splits
+    vectorstore = Chroma.from_documents(documents=split_docs, embedding=mxbai_embedder)
+    # Retrieve and generate using the relevant snippets of the blog
+    retriever = vectorstore.as_retriever()
+    return retriever

apis/v1/interfaces/__pycache__/document_interface.cpython-312.pyc ADDED Viewed

Binary file (163 Bytes). View file

apis/v1/interfaces/document_interface.py CHANGED Viewed

	@@ -0,0 +1,5 @@

+# from typing import List, Dict
+# from fastapi import UploadFile, File
+# from pydantic import BaseModel, Field
+# from ..schemas.document_schema import DocsModel

apis/v1/interfaces/rag_interface.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from typing import List, Dict
+from fastapi import UploadFile, File
+from pydantic import BaseModel, Field
+from ..schemas.rag_schema import RAGModel
+class RagResponseInterface(BaseModel):
+    msg: str = Field(..., description="Message response")
+    data: list[RAGModel] = Field(..., description="")

apis/v1/providers/{vectordb_provider.py → vectorstore_provider.py} RENAMED Viewed

File without changes

apis/v1/routes/__pycache__/documents.cpython-312.pyc ADDED Viewed

Binary file (1.67 kB). View file

apis/v1/routes/documents.py CHANGED Viewed

@@ -4,10 +4,35 @@ from fastapi import APIRouter, Depends, BackgroundTasks
 from ..interfaces.document_interface import DocumentUploadResponseInterface
 router = APIRouter(prefix="/documents", tags=["Documents"])
 @router.post("/upload", response_model=DocumentResponse)
 async def upload_document(file: Annotated[BytesIO, Field(..., description="File to upload")], background_tasks: BackgroundTasks):
     """
     Upload a document
     """
-    return DocumentUploadResponseInterface(file=file)

 from ..interfaces.document_interface import DocumentUploadResponseInterface
 router = APIRouter(prefix="/documents", tags=["Documents"])
+@router.get("/{document_id}", response_model=DocumentResponse)
+async def get_document(document_id: str):
+    """
+    Get a document
+    """
+    return {"document_id": document_id}
 @router.post("/upload", response_model=DocumentResponse)
 async def upload_document(file: Annotated[BytesIO, Field(..., description="File to upload")], background_tasks: BackgroundTasks):
     """
     Upload a document
     """
+    return DocumentUploadResponseInterface(file=file)
+@router.update("/{document_id}", response_model=DocumentResponse)
+async def update_document(document_id: str):
+    """
+    Update a document
+    """
+    return {"document_id": document_id}
+@router.delete("/{document_id}")
+async def delete_document(document_id: str):
+    """
+    Delete a document
+    """
+    return {"document_id": document_id}

apis/v1/routes/rag.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from typing import Annotated
+from io import BytesIO
+from fastapi import APIRouter, Depends, BackgroundTasks
+from ..interfaces.rag_interface import RagResponseInterface
+from ..controllers.rag_controller import invoke
+router = APIRouter(prefix="/rag", tags=["Rag"])
+@router.post("/", response_model=RagResponseInterface)
+async def get_rag(doc_id: str, question: str):
+    """
+    Get response from RAG
+    """
+    answer = invoke(doc_id, question)
+    return answer

apis/v1/schemas/__pycache__/document_schema.cpython-312.pyc ADDED Viewed

Binary file (883 Bytes). View file

apis/v1/schemas/document_schema.py CHANGED Viewed

@@ -1,7 +1,50 @@
 from typing import AnyStr, Dict
 from pydantic import BaseModel, Field
-from ..providers import jd_db
-class DocumentResponse(BaseModel):

 from typing import AnyStr, Dict
 from pydantic import BaseModel, Field
+# from ..providers import jd_db
+from ..utils.utils import get_current_time
+class DocModel(BaseModel):
+    id: str = Field(..., description="Document ID")
+    name: str = Field(..., description="Document Name")
+    path: str = Field(..., description="Document Path")
+    content: str = Field(..., description="Document Content")
+    upload_at: str = Field(..., description="Document Uploaded At")
+class DocSchema:
+    '''
+    Schema and Validation for Document
+    '''
+    def __init__(
+        self,
+        doc_id: AnyStr = None,
+        name: AnyStr = "",
+        path: AnyStr = "",
+        content: AnyStr = "",
+        upload_at: AnyStr = get_current_time()
+    ):
+        self.id = doc_id
+        self.name = name
+        self.path = path
+        self.content = content
+        self.upload_at = upload_at
+    def to_dict(self, include_id = True):
+        data_dict = {
+            "name": self.name,
+            "path": self.path,
+            "content": self.content,
+            "upload_at": self.upload_at
+        }
+        if include_id:
+            data_dict["id"] = self.id
+        return data_dict
+    @staticmethod
+    def from_dict(data: Dict):
+        return DocSchema(
+            doc_id=data.get("id"),
+            name=data.get("name"),
+            path=data.get("path"),
+            content=data.get("content"),
+            upload_at=data.get("upload_at")
+        )

apis/v1/schemas/rag_schema.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from typing import AnyStr, Dict
+import enum
+from pydantic import BaseModel, Field
+# from ..providers import rag_db
+# from ..providers import storage_db
+class RAGModel(BaseModel):
+    doc_id: str = Field(None, title="Doc ID")
+    path: str = Field("", title="Doc Path")
+    content: str = Field("", title="Doc Content")
+    question: list = Field("",title="List of questions about that document")

apis/v1/utils/__pycache__/prompts.cpython-311.pyc DELETED Viewed

Binary file (481 Bytes)

apis/v1/utils/response_fmt.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from typing import Any
+from fastapi.responses import JSONResponse
+def jsonResponseFmt(data: Any, msg: str = "Success", code: int = 200, **kwargs):
+    return JSONResponse({
+        "msg": msg,
+        "data": data
+    }, code, **kwargs)

apis/v1/utils/utils.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import datetime
+def get_current_time() -> str:
+    '''
+    Get the current time in the string format.
+    '''
+    return datetime.datetime.now().isoformat()