Spaces:
Sleeping
Sleeping
Commit ·
72bf066
1
Parent(s): 61861c5
chore: add file requirements, streamlit app, use Chroma vectordb
Browse files- apis/v1/controllers/rag_controller.py +6 -5
- apis/v1/controllers/vectorstore_controller.py +5 -5
- apis/v1/routes/rag.py +2 -2
- app.py +32 -0
- requirements.txt +0 -0
apis/v1/controllers/rag_controller.py
CHANGED
|
@@ -14,7 +14,7 @@ def format_docs(docs):
|
|
| 14 |
return "\n\n".join(doc.page_content for doc in docs)
|
| 15 |
|
| 16 |
def predict(file_path: str, question: str) -> str:
|
| 17 |
-
docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=mxbai_embedder)
|
| 18 |
|
| 19 |
# Load and split the PDF document into pages
|
| 20 |
pdf_loader = PyPDFLoader(file_path)
|
|
@@ -23,10 +23,11 @@ def predict(file_path: str, question: str) -> str:
|
|
| 23 |
# Split the pages into smaller chunks
|
| 24 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 25 |
splits = text_splitter.split_documents(pages)
|
| 26 |
-
|
| 27 |
# Retrieve and generate using the relevant snippets of the document
|
| 28 |
-
retriever = create_vector_store(splits, docsearch)
|
| 29 |
-
|
|
|
|
| 30 |
custom_rag_prompt = PromptTemplate.from_template(rag_prompt)
|
| 31 |
|
| 32 |
# Define the RAG chain
|
|
@@ -39,5 +40,5 @@ def predict(file_path: str, question: str) -> str:
|
|
| 39 |
|
| 40 |
# Invoke the RAG chain with a question
|
| 41 |
response = rag_chain.invoke(question)
|
| 42 |
-
print(response)
|
| 43 |
return response
|
|
|
|
| 14 |
return "\n\n".join(doc.page_content for doc in docs)
|
| 15 |
|
| 16 |
def predict(file_path: str, question: str) -> str:
|
| 17 |
+
# docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=mxbai_embedder)
|
| 18 |
|
| 19 |
# Load and split the PDF document into pages
|
| 20 |
pdf_loader = PyPDFLoader(file_path)
|
|
|
|
| 23 |
# Split the pages into smaller chunks
|
| 24 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 25 |
splits = text_splitter.split_documents(pages)
|
| 26 |
+
# print("\nSplits\n",splits)
|
| 27 |
# Retrieve and generate using the relevant snippets of the document
|
| 28 |
+
# retriever = create_vector_store(splits, docsearch)
|
| 29 |
+
retriever = create_vector_store(splits)
|
| 30 |
+
# print("\nretriever created\n", retriever)
|
| 31 |
custom_rag_prompt = PromptTemplate.from_template(rag_prompt)
|
| 32 |
|
| 33 |
# Define the RAG chain
|
|
|
|
| 40 |
|
| 41 |
# Invoke the RAG chain with a question
|
| 42 |
response = rag_chain.invoke(question)
|
| 43 |
+
# print("Response",response)
|
| 44 |
return response
|
apis/v1/controllers/vectorstore_controller.py
CHANGED
|
@@ -2,12 +2,12 @@ from langchain_chroma import Chroma
|
|
| 2 |
from ..configs.word_embedding_config import mxbai_embedder
|
| 3 |
from ..providers import vectorstore_db
|
| 4 |
|
| 5 |
-
def create_vector_store(split_docs
|
| 6 |
# Create a vector store from the document splits
|
| 7 |
-
|
| 8 |
# Upload the documents to the vector store
|
| 9 |
-
vectorstore_db.upload_documents(split_docs, mxbai_embedder)
|
| 10 |
# Retrieve and generate using the relevant snippets of the blog
|
| 11 |
-
|
| 12 |
-
retriever = docsearch.as_retriever()
|
| 13 |
return retriever
|
|
|
|
| 2 |
from ..configs.word_embedding_config import mxbai_embedder
|
| 3 |
from ..providers import vectorstore_db
|
| 4 |
|
| 5 |
+
def create_vector_store(split_docs):
|
| 6 |
# Create a vector store from the document splits
|
| 7 |
+
vectorstore = Chroma.from_documents(documents=split_docs, embedding=mxbai_embedder)
|
| 8 |
# Upload the documents to the vector store
|
| 9 |
+
# vectorstore_db.upload_documents(split_docs, mxbai_embedder)
|
| 10 |
# Retrieve and generate using the relevant snippets of the blog
|
| 11 |
+
retriever = vectorstore.as_retriever()
|
| 12 |
+
# retriever = docsearch.as_retriever()
|
| 13 |
return retriever
|
apis/v1/routes/rag.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
from typing import Annotated
|
| 2 |
import tempfile
|
| 3 |
import os
|
| 4 |
-
from fastapi import UploadFile,APIRouter, Depends, BackgroundTasks
|
| 5 |
from ..interfaces.rag_interface import RagResponseInterface
|
| 6 |
from ..controllers.rag_controller import predict
|
| 7 |
from ..utils.response_fmt import jsonResponseFmt
|
| 8 |
router = APIRouter(prefix="/rag", tags=["Rag"])
|
| 9 |
|
| 10 |
@router.post("/upload", response_model=RagResponseInterface)
|
| 11 |
-
async def get_rag(doc: UploadFile, question: str):
|
| 12 |
"""
|
| 13 |
Get response from RAG
|
| 14 |
"""
|
|
|
|
| 1 |
from typing import Annotated
|
| 2 |
import tempfile
|
| 3 |
import os
|
| 4 |
+
from fastapi import UploadFile,APIRouter, Depends, BackgroundTasks, Form
|
| 5 |
from ..interfaces.rag_interface import RagResponseInterface
|
| 6 |
from ..controllers.rag_controller import predict
|
| 7 |
from ..utils.response_fmt import jsonResponseFmt
|
| 8 |
router = APIRouter(prefix="/rag", tags=["Rag"])
|
| 9 |
|
| 10 |
@router.post("/upload", response_model=RagResponseInterface)
|
| 11 |
+
async def get_rag(doc: UploadFile, question: str= Form(...)):
|
| 12 |
"""
|
| 13 |
Get response from RAG
|
| 14 |
"""
|
app.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import requests
|
| 4 |
+
|
| 5 |
+
# Define the FastAPI endpoint URL
|
| 6 |
+
FASTAPI_URL = "http://localhost:7860/api/v1/rag/upload"
|
| 7 |
+
|
| 8 |
+
st.title("Document Summarizer")
|
| 9 |
+
|
| 10 |
+
# File uploader
|
| 11 |
+
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
|
| 12 |
+
|
| 13 |
+
# Text input for the question
|
| 14 |
+
question = st.text_input("Enter your question")
|
| 15 |
+
|
| 16 |
+
# Button to submit the file and question
|
| 17 |
+
if st.button("Get Summary"):
|
| 18 |
+
if uploaded_file and question:
|
| 19 |
+
# Use a form to submit the file and question
|
| 20 |
+
with st.spinner('Processing...'):
|
| 21 |
+
files = {"doc": uploaded_file.getvalue()}
|
| 22 |
+
response = requests.post(FASTAPI_URL, files={"doc": uploaded_file}, data={"question": question})
|
| 23 |
+
|
| 24 |
+
if response.status_code == 200:
|
| 25 |
+
result = response.json()
|
| 26 |
+
st.success("Response received successfully!")
|
| 27 |
+
st.write(result["data"])
|
| 28 |
+
else:
|
| 29 |
+
st.error(f"Error: {response.status_code}")
|
| 30 |
+
st.write(response.json())
|
| 31 |
+
else:
|
| 32 |
+
st.warning("Please upload a file and enter a question.")
|
requirements.txt
ADDED
|
Binary file (6.17 kB). View file
|
|
|