Spaces:
Sleeping
Sleeping
Commit ·
32c2637
1
Parent(s): 253280e
chore: add document route
Browse files- apis/__init__.py +2 -0
- apis/__pycache__/__init__.cpython-312.pyc +0 -0
- apis/v1/configs/llm_configs.py +2 -1
- apis/v1/controllers/document_controller.py +38 -0
- apis/v1/controllers/rag_controller.py +22 -0
- apis/v1/controllers/vectorstore_controller.py +10 -0
- apis/v1/interfaces/__pycache__/document_interface.cpython-312.pyc +0 -0
- apis/v1/interfaces/document_interface.py +5 -0
- apis/v1/interfaces/rag_interface.py +8 -0
- apis/v1/providers/{vectordb_provider.py → vectorstore_provider.py} +0 -0
- apis/v1/routes/__pycache__/documents.cpython-312.pyc +0 -0
- apis/v1/routes/documents.py +26 -1
- apis/v1/routes/rag.py +15 -0
- apis/v1/schemas/__pycache__/document_schema.cpython-312.pyc +0 -0
- apis/v1/schemas/document_schema.py +46 -3
- apis/v1/schemas/rag_schema.py +14 -0
- apis/v1/utils/__pycache__/prompts.cpython-311.pyc +0 -0
- apis/v1/utils/response_fmt.py +9 -0
- apis/v1/utils/utils.py +7 -0
apis/__init__.py
CHANGED
|
@@ -3,4 +3,6 @@ from .v1.routes.documents import router as documents_router
|
|
| 3 |
|
| 4 |
|
| 5 |
api_v1_router = APIRouter(prefix="/v1")
|
|
|
|
|
|
|
| 6 |
api_v1_router.include_router(documents_router)
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
api_v1_router = APIRouter(prefix="/v1")
|
| 6 |
+
|
| 7 |
+
# Register routes for the API
|
| 8 |
api_v1_router.include_router(documents_router)
|
apis/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (384 Bytes). View file
|
|
|
apis/v1/configs/llm_configs.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import os
|
| 2 |
from langchain_openai import ChatOpenAI
|
| 3 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 4 |
-
|
|
|
|
| 5 |
|
| 6 |
gpt_model = ChatOpenAI(api_key=os.environ.get('OPENAI_API_KEY'), temperature=0,
|
| 7 |
request_timeout=120, streaming=True, model="gpt-3.5-turbo-0125")
|
|
|
|
| 1 |
import os
|
| 2 |
from langchain_openai import ChatOpenAI
|
| 3 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
load_dotenv()
|
| 6 |
|
| 7 |
gpt_model = ChatOpenAI(api_key=os.environ.get('OPENAI_API_KEY'), temperature=0,
|
| 8 |
request_timeout=120, streaming=True, model="gpt-3.5-turbo-0125")
|
apis/v1/controllers/document_controller.py
CHANGED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import AnyStr
|
| 2 |
+
from fastapi import UploadFile, HTTPException, status, BackgroundTasks
|
| 3 |
+
import uuid
|
| 4 |
+
import time
|
| 5 |
+
from ..schemas.document_schema import DocSchema
|
| 6 |
+
|
| 7 |
+
# def get_all_docs():
|
| 8 |
+
# '''
|
| 9 |
+
# Get all the documents from the database.
|
| 10 |
+
# '''
|
| 11 |
+
# return Document.objects.all()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _upload_docs(filename: AnyStr, doc: DocSchema):
|
| 15 |
+
'''
|
| 16 |
+
Get content type of file.
|
| 17 |
+
'''
|
| 18 |
+
# Get content type of file
|
| 19 |
+
content_type = get_content_type(filename)
|
| 20 |
+
path, url = storage_db.upload(data, filename, content_type)
|
| 21 |
+
cv.update_path_url(path, url)
|
| 22 |
+
|
| 23 |
+
return
|
| 24 |
+
|
| 25 |
+
# def update_docs():
|
| 26 |
+
# '''
|
| 27 |
+
# Update a document in the database.
|
| 28 |
+
# '''
|
| 29 |
+
# return Document.objects.update()
|
| 30 |
+
|
| 31 |
+
# def delete_docs():
|
| 32 |
+
# '''
|
| 33 |
+
# Delete a document from the database.
|
| 34 |
+
# '''
|
| 35 |
+
# return Document.objects.delete()
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
async def upload_doc()
|
apis/v1/controllers/rag_controller.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 2 |
+
from langchain_core.runnables import RunnablePassthrough
|
| 3 |
+
from langchain_core.prompts import PromptTemplate
|
| 4 |
+
from ..utils.prompts import rag_prompt
|
| 5 |
+
from ..configs.llm_configs import gemini_model as llm
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def format_docs(docs):
|
| 10 |
+
return "\n\n".join(doc.page_content for doc in docs)
|
| 11 |
+
|
| 12 |
+
def invoke(retriever, question:str) -> str:
|
| 13 |
+
|
| 14 |
+
prompt = PromptTemplate.from_template(rag_prompt)
|
| 15 |
+
rag_chain = (
|
| 16 |
+
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
| 17 |
+
| prompt
|
| 18 |
+
| llm
|
| 19 |
+
| StrOutputParser()
|
| 20 |
+
)
|
| 21 |
+
response = rag_chain.invoke(question)
|
| 22 |
+
return response
|
apis/v1/controllers/vectorstore_controller.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_chroma import Chroma
|
| 2 |
+
from ..configs.word_embedding_config import mxbai_embedder
|
| 3 |
+
|
| 4 |
+
def create_vector_store(split_docs):
|
| 5 |
+
# Create a vector store from the document splits
|
| 6 |
+
vectorstore = Chroma.from_documents(documents=split_docs, embedding=mxbai_embedder)
|
| 7 |
+
|
| 8 |
+
# Retrieve and generate using the relevant snippets of the blog
|
| 9 |
+
retriever = vectorstore.as_retriever()
|
| 10 |
+
return retriever
|
apis/v1/interfaces/__pycache__/document_interface.cpython-312.pyc
ADDED
|
Binary file (163 Bytes). View file
|
|
|
apis/v1/interfaces/document_interface.py
CHANGED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# from typing import List, Dict
|
| 2 |
+
# from fastapi import UploadFile, File
|
| 3 |
+
# from pydantic import BaseModel, Field
|
| 4 |
+
# from ..schemas.document_schema import DocsModel
|
| 5 |
+
|
apis/v1/interfaces/rag_interface.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict
|
| 2 |
+
from fastapi import UploadFile, File
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
from ..schemas.rag_schema import RAGModel
|
| 5 |
+
|
| 6 |
+
class RagResponseInterface(BaseModel):
|
| 7 |
+
msg: str = Field(..., description="Message response")
|
| 8 |
+
data: list[RAGModel] = Field(..., description="")
|
apis/v1/providers/{vectordb_provider.py → vectorstore_provider.py}
RENAMED
|
File without changes
|
apis/v1/routes/__pycache__/documents.cpython-312.pyc
ADDED
|
Binary file (1.67 kB). View file
|
|
|
apis/v1/routes/documents.py
CHANGED
|
@@ -4,10 +4,35 @@ from fastapi import APIRouter, Depends, BackgroundTasks
|
|
| 4 |
from ..interfaces.document_interface import DocumentUploadResponseInterface
|
| 5 |
router = APIRouter(prefix="/documents", tags=["Documents"])
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
@router.post("/upload", response_model=DocumentResponse)
|
| 8 |
async def upload_document(file: Annotated[BytesIO, Field(..., description="File to upload")], background_tasks: BackgroundTasks):
|
| 9 |
"""
|
| 10 |
Upload a document
|
| 11 |
"""
|
| 12 |
|
| 13 |
-
return DocumentUploadResponseInterface(file=file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from ..interfaces.document_interface import DocumentUploadResponseInterface
|
| 5 |
router = APIRouter(prefix="/documents", tags=["Documents"])
|
| 6 |
|
| 7 |
+
|
| 8 |
+
@router.get("/{document_id}", response_model=DocumentResponse)
|
| 9 |
+
async def get_document(document_id: str):
|
| 10 |
+
"""
|
| 11 |
+
Get a document
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
return {"document_id": document_id}
|
| 15 |
+
|
| 16 |
@router.post("/upload", response_model=DocumentResponse)
|
| 17 |
async def upload_document(file: Annotated[BytesIO, Field(..., description="File to upload")], background_tasks: BackgroundTasks):
|
| 18 |
"""
|
| 19 |
Upload a document
|
| 20 |
"""
|
| 21 |
|
| 22 |
+
return DocumentUploadResponseInterface(file=file)
|
| 23 |
+
|
| 24 |
+
@router.update("/{document_id}", response_model=DocumentResponse)
|
| 25 |
+
async def update_document(document_id: str):
|
| 26 |
+
"""
|
| 27 |
+
Update a document
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
return {"document_id": document_id}
|
| 31 |
+
|
| 32 |
+
@router.delete("/{document_id}")
|
| 33 |
+
async def delete_document(document_id: str):
|
| 34 |
+
"""
|
| 35 |
+
Delete a document
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
return {"document_id": document_id}
|
apis/v1/routes/rag.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Annotated
|
| 2 |
+
from io import BytesIO
|
| 3 |
+
from fastapi import APIRouter, Depends, BackgroundTasks
|
| 4 |
+
from ..interfaces.rag_interface import RagResponseInterface
|
| 5 |
+
from ..controllers.rag_controller import invoke
|
| 6 |
+
|
| 7 |
+
router = APIRouter(prefix="/rag", tags=["Rag"])
|
| 8 |
+
|
| 9 |
+
@router.post("/", response_model=RagResponseInterface)
|
| 10 |
+
async def get_rag(doc_id: str, question: str):
|
| 11 |
+
"""
|
| 12 |
+
Get response from RAG
|
| 13 |
+
"""
|
| 14 |
+
answer = invoke(doc_id, question)
|
| 15 |
+
return answer
|
apis/v1/schemas/__pycache__/document_schema.cpython-312.pyc
ADDED
|
Binary file (883 Bytes). View file
|
|
|
apis/v1/schemas/document_schema.py
CHANGED
|
@@ -1,7 +1,50 @@
|
|
| 1 |
from typing import AnyStr, Dict
|
| 2 |
from pydantic import BaseModel, Field
|
| 3 |
-
from ..providers import jd_db
|
|
|
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
class
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from typing import AnyStr, Dict
|
| 2 |
from pydantic import BaseModel, Field
|
| 3 |
+
# from ..providers import jd_db
|
| 4 |
+
from ..utils.utils import get_current_time
|
| 5 |
|
| 6 |
+
class DocModel(BaseModel):
|
| 7 |
+
id: str = Field(..., description="Document ID")
|
| 8 |
+
name: str = Field(..., description="Document Name")
|
| 9 |
+
path: str = Field(..., description="Document Path")
|
| 10 |
+
content: str = Field(..., description="Document Content")
|
| 11 |
+
upload_at: str = Field(..., description="Document Uploaded At")
|
| 12 |
|
| 13 |
+
class DocSchema:
|
| 14 |
+
'''
|
| 15 |
+
Schema and Validation for Document
|
| 16 |
+
'''
|
| 17 |
+
def __init__(
|
| 18 |
+
self,
|
| 19 |
+
doc_id: AnyStr = None,
|
| 20 |
+
name: AnyStr = "",
|
| 21 |
+
path: AnyStr = "",
|
| 22 |
+
content: AnyStr = "",
|
| 23 |
+
upload_at: AnyStr = get_current_time()
|
| 24 |
+
):
|
| 25 |
+
self.id = doc_id
|
| 26 |
+
self.name = name
|
| 27 |
+
self.path = path
|
| 28 |
+
self.content = content
|
| 29 |
+
self.upload_at = upload_at
|
| 30 |
+
|
| 31 |
+
def to_dict(self, include_id = True):
|
| 32 |
+
data_dict = {
|
| 33 |
+
"name": self.name,
|
| 34 |
+
"path": self.path,
|
| 35 |
+
"content": self.content,
|
| 36 |
+
"upload_at": self.upload_at
|
| 37 |
+
}
|
| 38 |
+
if include_id:
|
| 39 |
+
data_dict["id"] = self.id
|
| 40 |
+
return data_dict
|
| 41 |
+
|
| 42 |
+
@staticmethod
|
| 43 |
+
def from_dict(data: Dict):
|
| 44 |
+
return DocSchema(
|
| 45 |
+
doc_id=data.get("id"),
|
| 46 |
+
name=data.get("name"),
|
| 47 |
+
path=data.get("path"),
|
| 48 |
+
content=data.get("content"),
|
| 49 |
+
upload_at=data.get("upload_at")
|
| 50 |
+
)
|
apis/v1/schemas/rag_schema.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import AnyStr, Dict
|
| 2 |
+
import enum
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
# from ..providers import rag_db
|
| 5 |
+
# from ..providers import storage_db
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class RAGModel(BaseModel):
|
| 10 |
+
doc_id: str = Field(None, title="Doc ID")
|
| 11 |
+
path: str = Field("", title="Doc Path")
|
| 12 |
+
content: str = Field("", title="Doc Content")
|
| 13 |
+
question: list = Field("",title="List of questions about that document")
|
| 14 |
+
|
apis/v1/utils/__pycache__/prompts.cpython-311.pyc
DELETED
|
Binary file (481 Bytes)
|
|
|
apis/v1/utils/response_fmt.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any
|
| 2 |
+
from fastapi.responses import JSONResponse
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def jsonResponseFmt(data: Any, msg: str = "Success", code: int = 200, **kwargs):
|
| 6 |
+
return JSONResponse({
|
| 7 |
+
"msg": msg,
|
| 8 |
+
"data": data
|
| 9 |
+
}, code, **kwargs)
|
apis/v1/utils/utils.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datetime
|
| 2 |
+
|
| 3 |
+
def get_current_time() -> str:
|
| 4 |
+
'''
|
| 5 |
+
Get the current time in the string format.
|
| 6 |
+
'''
|
| 7 |
+
return datetime.datetime.now().isoformat()
|