Spaces:

duythduong
/

fpt-chat

Sleeping

duythduong commited on Jul 5, 2024

Commit

72bf066

1 Parent(s): 61861c5

chore: add file requirements, streamlit app, use Chroma vectordb

Files changed (5) hide show

apis/v1/controllers/rag_controller.py CHANGED Viewed

@@ -14,7 +14,7 @@ def format_docs(docs):
     return "\n\n".join(doc.page_content for doc in docs)
 def predict(file_path: str, question: str) -> str:
-    docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=mxbai_embedder)
     # Load and split the PDF document into pages
     pdf_loader = PyPDFLoader(file_path)
@@ -23,10 +23,11 @@ def predict(file_path: str, question: str) -> str:
     # Split the pages into smaller chunks
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     splits = text_splitter.split_documents(pages)
     # Retrieve and generate using the relevant snippets of the document
-    retriever = create_vector_store(splits, docsearch)
     custom_rag_prompt = PromptTemplate.from_template(rag_prompt)
     # Define the RAG chain
@@ -39,5 +40,5 @@ def predict(file_path: str, question: str) -> str:
     # Invoke the RAG chain with a question
     response = rag_chain.invoke(question)
-    print(response)
     return response

     return "\n\n".join(doc.page_content for doc in docs)
 def predict(file_path: str, question: str) -> str:
+    # docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=mxbai_embedder)
     # Load and split the PDF document into pages
     pdf_loader = PyPDFLoader(file_path)
     # Split the pages into smaller chunks
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     splits = text_splitter.split_documents(pages)
+    # print("\nSplits\n",splits)
     # Retrieve and generate using the relevant snippets of the document
+    # retriever = create_vector_store(splits, docsearch)
+    retriever = create_vector_store(splits)
+    # print("\nretriever created\n", retriever)
     custom_rag_prompt = PromptTemplate.from_template(rag_prompt)
     # Define the RAG chain
     # Invoke the RAG chain with a question
     response = rag_chain.invoke(question)
+    # print("Response",response)
     return response

apis/v1/controllers/vectorstore_controller.py CHANGED Viewed

@@ -2,12 +2,12 @@ from langchain_chroma import Chroma
 from ..configs.word_embedding_config import mxbai_embedder
 from ..providers import vectorstore_db
-def create_vector_store(split_docs, docsearch):
     # Create a vector store from the document splits
-    # vectorstore = Chroma.from_documents(documents=split_docs, embedding=mxbai_embedder)
     # Upload the documents to the vector store
-    vectorstore_db.upload_documents(split_docs, mxbai_embedder)
     # Retrieve and generate using the relevant snippets of the blog
-    # retriever = vectorstore.as_retriever()
-    retriever = docsearch.as_retriever()
     return retriever

 from ..configs.word_embedding_config import mxbai_embedder
 from ..providers import vectorstore_db
+def create_vector_store(split_docs):
     # Create a vector store from the document splits
+    vectorstore = Chroma.from_documents(documents=split_docs, embedding=mxbai_embedder)
     # Upload the documents to the vector store
+    # vectorstore_db.upload_documents(split_docs, mxbai_embedder)
     # Retrieve and generate using the relevant snippets of the blog
+    retriever = vectorstore.as_retriever()
+    # retriever = docsearch.as_retriever()
     return retriever

apis/v1/routes/rag.py CHANGED Viewed

@@ -1,14 +1,14 @@
 from typing import Annotated
 import tempfile
 import os
-from fastapi import UploadFile,APIRouter, Depends, BackgroundTasks
 from ..interfaces.rag_interface import RagResponseInterface
 from ..controllers.rag_controller import predict
 from ..utils.response_fmt import jsonResponseFmt
 router = APIRouter(prefix="/rag", tags=["Rag"])
 @router.post("/upload", response_model=RagResponseInterface)
-async def get_rag(doc: UploadFile, question: str):
     """
     Get response from RAG
     """

 from typing import Annotated
 import tempfile
 import os
+from fastapi import UploadFile,APIRouter, Depends, BackgroundTasks, Form
 from ..interfaces.rag_interface import RagResponseInterface
 from ..controllers.rag_controller import predict
 from ..utils.response_fmt import jsonResponseFmt
 router = APIRouter(prefix="/rag", tags=["Rag"])
 @router.post("/upload", response_model=RagResponseInterface)
+async def get_rag(doc: UploadFile, question: str= Form(...)):
     """
     Get response from RAG
     """

app.py ADDED Viewed

+# app.py
+import streamlit as st
+import requests
+# Define the FastAPI endpoint URL
+FASTAPI_URL = "http://localhost:7860/api/v1/rag/upload"
+st.title("Document Summarizer")
+# File uploader
+uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
+# Text input for the question
+question = st.text_input("Enter your question")
+# Button to submit the file and question
+if st.button("Get Summary"):
+    if uploaded_file and question:
+        # Use a form to submit the file and question
+        with st.spinner('Processing...'):
+            files = {"doc": uploaded_file.getvalue()}
+            response = requests.post(FASTAPI_URL, files={"doc": uploaded_file}, data={"question": question})
+            if response.status_code == 200:
+                result = response.json()
+                st.success("Response received successfully!")
+                st.write(result["data"])
+            else:
+                st.error(f"Error: {response.status_code}")
+                st.write(response.json())
+    else:
+        st.warning("Please upload a file and enter a question.")

requirements.txt ADDED Viewed

Binary file (6.17 kB). View file