duythduong commited on
Commit
32c2637
·
1 Parent(s): 253280e

chore: add document route

Browse files
apis/__init__.py CHANGED
@@ -3,4 +3,6 @@ from .v1.routes.documents import router as documents_router
3
 
4
 
5
  api_v1_router = APIRouter(prefix="/v1")
 
 
6
  api_v1_router.include_router(documents_router)
 
3
 
4
 
5
  api_v1_router = APIRouter(prefix="/v1")
6
+
7
+ # Register routes for the API
8
  api_v1_router.include_router(documents_router)
apis/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (384 Bytes). View file
 
apis/v1/configs/llm_configs.py CHANGED
@@ -1,7 +1,8 @@
1
  import os
2
  from langchain_openai import ChatOpenAI
3
  from langchain_google_genai import ChatGoogleGenerativeAI
4
-
 
5
 
6
  gpt_model = ChatOpenAI(api_key=os.environ.get('OPENAI_API_KEY'), temperature=0,
7
  request_timeout=120, streaming=True, model="gpt-3.5-turbo-0125")
 
1
  import os
2
  from langchain_openai import ChatOpenAI
3
  from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from dotenv import load_dotenv
5
+ load_dotenv()
6
 
7
  gpt_model = ChatOpenAI(api_key=os.environ.get('OPENAI_API_KEY'), temperature=0,
8
  request_timeout=120, streaming=True, model="gpt-3.5-turbo-0125")
apis/v1/controllers/document_controller.py CHANGED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import AnyStr
2
+ from fastapi import UploadFile, HTTPException, status, BackgroundTasks
3
+ import uuid
4
+ import time
5
+ from ..schemas.document_schema import DocSchema
6
+
7
+ # def get_all_docs():
8
+ # '''
9
+ # Get all the documents from the database.
10
+ # '''
11
+ # return Document.objects.all()
12
+
13
+
14
+ def _upload_docs(filename: AnyStr, doc: DocSchema):
15
+ '''
16
+ Get content type of file.
17
+ '''
18
+ # Get content type of file
19
+ content_type = get_content_type(filename)
20
+ path, url = storage_db.upload(data, filename, content_type)
21
+ cv.update_path_url(path, url)
22
+
23
+ return
24
+
25
+ # def update_docs():
26
+ # '''
27
+ # Update a document in the database.
28
+ # '''
29
+ # return Document.objects.update()
30
+
31
+ # def delete_docs():
32
+ # '''
33
+ # Delete a document from the database.
34
+ # '''
35
+ # return Document.objects.delete()
36
+
37
+
38
+ async def upload_doc()
apis/v1/controllers/rag_controller.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.output_parsers import StrOutputParser
2
+ from langchain_core.runnables import RunnablePassthrough
3
+ from langchain_core.prompts import PromptTemplate
4
+ from ..utils.prompts import rag_prompt
5
+ from ..configs.llm_configs import gemini_model as llm
6
+
7
+
8
+
9
+ def format_docs(docs):
10
+ return "\n\n".join(doc.page_content for doc in docs)
11
+
12
+ def invoke(retriever, question:str) -> str:
13
+
14
+ prompt = PromptTemplate.from_template(rag_prompt)
15
+ rag_chain = (
16
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
17
+ | prompt
18
+ | llm
19
+ | StrOutputParser()
20
+ )
21
+ response = rag_chain.invoke(question)
22
+ return response
apis/v1/controllers/vectorstore_controller.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_chroma import Chroma
2
+ from ..configs.word_embedding_config import mxbai_embedder
3
+
4
+ def create_vector_store(split_docs):
5
+ # Create a vector store from the document splits
6
+ vectorstore = Chroma.from_documents(documents=split_docs, embedding=mxbai_embedder)
7
+
8
+ # Retrieve and generate using the relevant snippets of the blog
9
+ retriever = vectorstore.as_retriever()
10
+ return retriever
apis/v1/interfaces/__pycache__/document_interface.cpython-312.pyc ADDED
Binary file (163 Bytes). View file
 
apis/v1/interfaces/document_interface.py CHANGED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # from typing import List, Dict
2
+ # from fastapi import UploadFile, File
3
+ # from pydantic import BaseModel, Field
4
+ # from ..schemas.document_schema import DocsModel
5
+
apis/v1/interfaces/rag_interface.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+ from fastapi import UploadFile, File
3
+ from pydantic import BaseModel, Field
4
+ from ..schemas.rag_schema import RAGModel
5
+
6
+ class RagResponseInterface(BaseModel):
7
+ msg: str = Field(..., description="Message response")
8
+ data: list[RAGModel] = Field(..., description="")
apis/v1/providers/{vectordb_provider.py → vectorstore_provider.py} RENAMED
File without changes
apis/v1/routes/__pycache__/documents.cpython-312.pyc ADDED
Binary file (1.67 kB). View file
 
apis/v1/routes/documents.py CHANGED
@@ -4,10 +4,35 @@ from fastapi import APIRouter, Depends, BackgroundTasks
4
  from ..interfaces.document_interface import DocumentUploadResponseInterface
5
  router = APIRouter(prefix="/documents", tags=["Documents"])
6
 
 
 
 
 
 
 
 
 
 
7
  @router.post("/upload", response_model=DocumentResponse)
8
  async def upload_document(file: Annotated[BytesIO, Field(..., description="File to upload")], background_tasks: BackgroundTasks):
9
  """
10
  Upload a document
11
  """
12
 
13
- return DocumentUploadResponseInterface(file=file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from ..interfaces.document_interface import DocumentUploadResponseInterface
5
  router = APIRouter(prefix="/documents", tags=["Documents"])
6
 
7
+
8
+ @router.get("/{document_id}", response_model=DocumentResponse)
9
+ async def get_document(document_id: str):
10
+ """
11
+ Get a document
12
+ """
13
+
14
+ return {"document_id": document_id}
15
+
16
  @router.post("/upload", response_model=DocumentResponse)
17
  async def upload_document(file: Annotated[BytesIO, Field(..., description="File to upload")], background_tasks: BackgroundTasks):
18
  """
19
  Upload a document
20
  """
21
 
22
+ return DocumentUploadResponseInterface(file=file)
23
+
24
+ @router.update("/{document_id}", response_model=DocumentResponse)
25
+ async def update_document(document_id: str):
26
+ """
27
+ Update a document
28
+ """
29
+
30
+ return {"document_id": document_id}
31
+
32
+ @router.delete("/{document_id}")
33
+ async def delete_document(document_id: str):
34
+ """
35
+ Delete a document
36
+ """
37
+
38
+ return {"document_id": document_id}
apis/v1/routes/rag.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Annotated
2
+ from io import BytesIO
3
+ from fastapi import APIRouter, Depends, BackgroundTasks
4
+ from ..interfaces.rag_interface import RagResponseInterface
5
+ from ..controllers.rag_controller import invoke
6
+
7
+ router = APIRouter(prefix="/rag", tags=["Rag"])
8
+
9
+ @router.post("/", response_model=RagResponseInterface)
10
+ async def get_rag(doc_id: str, question: str):
11
+ """
12
+ Get response from RAG
13
+ """
14
+ answer = invoke(doc_id, question)
15
+ return answer
apis/v1/schemas/__pycache__/document_schema.cpython-312.pyc ADDED
Binary file (883 Bytes). View file
 
apis/v1/schemas/document_schema.py CHANGED
@@ -1,7 +1,50 @@
1
  from typing import AnyStr, Dict
2
  from pydantic import BaseModel, Field
3
- from ..providers import jd_db
 
4
 
 
 
 
 
 
 
5
 
6
- class DocumentResponse(BaseModel):
7
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import AnyStr, Dict
2
  from pydantic import BaseModel, Field
3
+ # from ..providers import jd_db
4
+ from ..utils.utils import get_current_time
5
 
6
+ class DocModel(BaseModel):
7
+ id: str = Field(..., description="Document ID")
8
+ name: str = Field(..., description="Document Name")
9
+ path: str = Field(..., description="Document Path")
10
+ content: str = Field(..., description="Document Content")
11
+ upload_at: str = Field(..., description="Document Uploaded At")
12
 
13
+ class DocSchema:
14
+ '''
15
+ Schema and Validation for Document
16
+ '''
17
+ def __init__(
18
+ self,
19
+ doc_id: AnyStr = None,
20
+ name: AnyStr = "",
21
+ path: AnyStr = "",
22
+ content: AnyStr = "",
23
+ upload_at: AnyStr = get_current_time()
24
+ ):
25
+ self.id = doc_id
26
+ self.name = name
27
+ self.path = path
28
+ self.content = content
29
+ self.upload_at = upload_at
30
+
31
+ def to_dict(self, include_id = True):
32
+ data_dict = {
33
+ "name": self.name,
34
+ "path": self.path,
35
+ "content": self.content,
36
+ "upload_at": self.upload_at
37
+ }
38
+ if include_id:
39
+ data_dict["id"] = self.id
40
+ return data_dict
41
+
42
+ @staticmethod
43
+ def from_dict(data: Dict):
44
+ return DocSchema(
45
+ doc_id=data.get("id"),
46
+ name=data.get("name"),
47
+ path=data.get("path"),
48
+ content=data.get("content"),
49
+ upload_at=data.get("upload_at")
50
+ )
apis/v1/schemas/rag_schema.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import AnyStr, Dict
2
+ import enum
3
+ from pydantic import BaseModel, Field
4
+ # from ..providers import rag_db
5
+ # from ..providers import storage_db
6
+
7
+
8
+
9
+ class RAGModel(BaseModel):
10
+ doc_id: str = Field(None, title="Doc ID")
11
+ path: str = Field("", title="Doc Path")
12
+ content: str = Field("", title="Doc Content")
13
+ question: list = Field("",title="List of questions about that document")
14
+
apis/v1/utils/__pycache__/prompts.cpython-311.pyc DELETED
Binary file (481 Bytes)
 
apis/v1/utils/response_fmt.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+ from fastapi.responses import JSONResponse
3
+
4
+
5
+ def jsonResponseFmt(data: Any, msg: str = "Success", code: int = 200, **kwargs):
6
+ return JSONResponse({
7
+ "msg": msg,
8
+ "data": data
9
+ }, code, **kwargs)
apis/v1/utils/utils.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import datetime
2
+
3
+ def get_current_time() -> str:
4
+ '''
5
+ Get the current time in the string format.
6
+ '''
7
+ return datetime.datetime.now().isoformat()