Spaces:

viboognesh
/

fastapiBackend

Runtime error

App Files Files Community

viboognesh commited on Mar 23, 2024

Commit

d25a986

verified ·

1 Parent(s): 94dc18e

Update main.py

Browse files

Files changed (1) hide show

main.py +117 -84

main.py CHANGED Viewed

@@ -1,17 +1,16 @@
 from fastapi import FastAPI, File, UploadFile, Depends
 from fastapi.middleware.cors import CORSMiddleware
-from typing import List
-import os
-import aiofiles
-import uuid
-import shutil
 from dotenv import load_dotenv
-from langchain_community.document_loaders import TextLoader, Docx2txtLoader, PyPDFLoader
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
 from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
-from langchain_community.document_loaders.csv_loader import CSVLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.memory import ConversationBufferMemory
 from langchain_openai import OpenAIEmbeddings, ChatOpenAI
@@ -20,37 +19,97 @@ from langchain.chains import ConversationalRetrievalChain
 load_dotenv()
-app = FastAPI()
-origins = ["https://viboognesh-react-chat.static.hf.space"]
-# origins = ["http://localhost:3000"]
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["GET", "POST"],
-    allow_headers=["*"],
-)
-class ConversationChainManager:
-    _instance = None
-    def __new__(cls, *args, **kwargs):
-        if not cls._instance:
-            cls._instance = super(ConversationChainManager, cls).__new__(
-                cls, *args, **kwargs
             )
-        return cls._instance
-    def __init__(self):
-        self.conversation_chain = None
         self.llm_model = ChatOpenAI()
         self.embeddings = OpenAIEmbeddings()
-    def create_conversational_chain(self, file_paths: List[str]):
-        docs = self.get_docs(file_paths)
         memory = ConversationBufferMemory(
             memory_key="chat_history", return_messages=True
         )
@@ -59,7 +118,7 @@ class ConversationChainManager:
             self.embeddings,
         )
         retriever = vectordb.as_retriever()
-        self.conversation_chain = ConversationalRetrievalChain.from_llm(
             llm=self.llm_model,
             retriever=retriever,
             condense_question_prompt=self.get_question_generator_prompt(),
@@ -70,39 +129,10 @@ class ConversationChainManager:
             memory=memory,
         )
-    @staticmethod
-    def get_docs(file_paths: List[str]) -> List:
-        docs = []
-        for file_path in file_paths:
-            if file_path.endswith(".txt"):
-                loader = TextLoader(file_path)
-                document = loader.load()
-                splitter = RecursiveCharacterTextSplitter(
-                    chunk_size=1000, chunk_overlap=100
-                )
-                txt_documents = splitter.split_documents(document)
-                docs.extend(txt_documents)
-            elif file_path.endswith(".csv"):
-                loader = CSVLoader(file_path)
-                csv_documents = loader.load()
-                docs.extend(csv_documents)
-            elif file_path.endswith(".docx"):
-                loader = Docx2txtLoader(file_path)
-                document = loader.load()
-                splitter = RecursiveCharacterTextSplitter(
-                    chunk_size=1000, chunk_overlap=100
-                )
-                docx_documents = splitter.split_documents(document)
-                docs.extend(docx_documents)
-            elif file_path.endswith(".pdf"):
-                loader = PyPDFLoader(file_path)
-                pdf_documents = loader.load_and_split()
-                docs.extend(pdf_documents)
-            os.remove(file_path)
-        return docs
     @staticmethod
-    def get_document_prompt() -> PromptTemplate:
         document_template = """Document Content:{page_content}
     Document Path: {source}"""
         return PromptTemplate(
@@ -111,7 +141,7 @@ class ConversationChainManager:
         )
     @staticmethod
-    def get_question_generator_prompt() -> PromptTemplate:
         question_generator_template = """Combine the chat history and follow up question into
     a standalone question.\n Chat History: {chat_history}\n
     Follow up question: {question}
@@ -119,7 +149,7 @@ class ConversationChainManager:
         return PromptTemplate.from_template(question_generator_template)
     @staticmethod
-    def get_final_prompt() -> ChatPromptTemplate:
         final_prompt_template = """Answer question based on the context and chat_history.
     If you cannot find answers, ask more related questions from the user.
     Use only the basename of the file path as name of the documents.
@@ -145,27 +175,33 @@ class ConversationChainManager:
         return ChatPromptTemplate.from_messages(messages)
-app.state.conversational_chain_manager = ConversationChainManager()
 @app.post("/upload_files/")
-async def upload_files(
-    files: List[UploadFile] = File(...),
-    conversation_chain_manager: ConversationChainManager = Depends(
-        lambda: app.state.conversational_chain_manager
-    ),
-):
-    session_folder = f"uploads"
-    os.makedirs(session_folder, exist_ok=True)
-    file_paths = []
     for file in files:
-        file_path = f"{session_folder}/{file.filename}"
-        async with aiofiles.open(file_path, "wb") as out_file:
-            content = await file.read()
-            await out_file.write(content)
-        file_paths.append(file_path)
-    conversation_chain_manager.create_conversational_chain(file_paths)
     print("conversational_chain_manager created")
     return {"message": "ConversationalRetrievalChain is created. Please ask questions."}
@@ -173,16 +209,13 @@ async def upload_files(
 @app.get("/predict/")
 async def predict(
     query: str,
-    conversation_chain_manager: ConversationChainManager = Depends(
-        lambda: app.state.conversational_chain_manager
-    ),
 ):
-    if conversation_chain_manager.conversation_chain is None:
         system_prompt = "Answer the question and also ask the user to upload files to ask questions from the files.\n"
-        response = conversation_chain_manager.llm_model.invoke(system_prompt + query)
         answer = response.content
     else:
-        response = conversation_chain_manager.conversation_chain.invoke(query)
         answer = response["answer"]
     print("predict called")

 from fastapi import FastAPI, File, UploadFile, Depends
 from fastapi.middleware.cors import CORSMiddleware
+from typing import List, Dict, Any
+from io import BytesIO, StringIO
+from docx import Document
+from langchain.docstore.document import Document as langchain_Document
+from PyPDF2 import PdfReader
+import csv
 from dotenv import load_dotenv
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
 from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.memory import ConversationBufferMemory
 from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 load_dotenv()
+class Document_Processor:
+    def __init__(self , file_details: List[Dict[Any, str]]):
+        self.file_details = file_details
+    def get_docs(self) -> List[langchain_Document]:
+        docs = []
+        for file_detail in self.file_details:
+            if file_detail["name"].endswith(".txt"):
+                docs.extend(self.get_txt_docs(file_detail))
+            elif file_detail["name"].endswith(".csv"):
+                docs.extend(self.get_csv_docs(file_detail))
+            elif file_detail["name"].endswith(".docx"):
+                docs.extend(self.get_docx_docs(file_detail))
+            elif file_detail["name"].endswith(".pdf"):
+                docs.extend(self.get_pdf_docs(file_detail))
+        return docs
+    @staticmethod
+    def get_txt_docs(self, file_detail: Dict[str, Any]) -> List[langchain_Document]:
+        text = file_detail["content"].decode("utf-8")
+        source = file_detail["name"]
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000, chunk_overlap=100
+        )
+        text_docs = text_splitter.create_documents(
+            [text], metadatas=[{"source": source}]
+        )
+        return text_docs
+    @staticmethod
+    def get_csv_docs(self, file_detail: Dict[str, Any]) -> List[langchain_Document]:
+        csv_data = file_detail["content"]
+        source = file_detail["name"]
+        csv_string = csv_data.decode("utf-8")
+        # Use StringIO to create a file-like object from the string
+        csv_file = StringIO(csv_string)
+        csv_reader = csv.DictReader(csv_file)
+        csv_docs = []
+        for row in csv_reader:
+            # Convert each row into a dictionary of key/value pairs
+            page_content = ""
+            for key, value in row.items():
+                page_content += f"{key}: {value}\n"
+            doc = langchain_Document(
+                page_content=page_content, metadata={"source": source}
             )
+            csv_docs.append(doc)
+        return csv_docs
+    @staticmethod
+    def get_pdf_docs(self, file_detail: Dict[str, Any]) -> List[langchain_Document]:
+        pdf_content = BytesIO(file_detail["content"])
+        source = file_detail["name"]
+        reader = PdfReader(pdf_content)
+        pdf_text = ""
+        for page in reader.pages:
+            pdf_text += page.extract_text() + "\n"
+        pdf_docs = RecursiveCharacterTextSplitter.create_documents(
+            texts=[pdf_text], metadatas=[{"source": source}]
+        )
+        return pdf_docs
+    @staticmethod
+    def get_docx_docs(self, file_detail: Dict[str, Any]) -> List[langchain_Document]:
+        docx_content = BytesIO(file_detail["content"])
+        source = file_detail["name"]
+        document = Document(docx_content)
+        docx_text = " ".join([paragraph.text for paragraph in document.paragraphs])
+        docx_docs = RecursiveCharacterTextSplitter.create_documents(
+            [docx_text], metadatas=[{"source": source}]
+        )
+        return docx_docs
+class Conversational_Chain:
+    def __init__(self, file_details: List[Dict[Any, str]]):
         self.llm_model = ChatOpenAI()
         self.embeddings = OpenAIEmbeddings()
+        self.file_details = file_details
+    def create_conversational_chain(self):
+        docs = Document_Processor(self.file_details).get_docs()
         memory = ConversationBufferMemory(
             memory_key="chat_history", return_messages=True
         )
             self.embeddings,
         )
         retriever = vectordb.as_retriever()
+        conversation_chain = ConversationalRetrievalChain.from_llm(
             llm=self.llm_model,
             retriever=retriever,
             condense_question_prompt=self.get_question_generator_prompt(),
             memory=memory,
         )
+        return conversation_chain
     @staticmethod
+    def get_document_prompt(self) -> PromptTemplate:
         document_template = """Document Content:{page_content}
     Document Path: {source}"""
         return PromptTemplate(
         )
     @staticmethod
+    def get_question_generator_prompt(self) -> PromptTemplate:
         question_generator_template = """Combine the chat history and follow up question into
     a standalone question.\n Chat History: {chat_history}\n
     Follow up question: {question}
         return PromptTemplate.from_template(question_generator_template)
     @staticmethod
+    def get_final_prompt(self) -> ChatPromptTemplate:
         final_prompt_template = """Answer question based on the context and chat_history.
     If you cannot find answers, ask more related questions from the user.
     Use only the basename of the file path as name of the documents.
         return ChatPromptTemplate.from_messages(messages)
+app = FastAPI()
+origins = ["https://viboognesh-react-chat.static.hf.space"]
+# origins = ["http://localhost:3000"]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["GET", "POST"],
+    allow_headers=["*"],
+)
+app.state.conversation_chain = None
 @app.post("/upload_files/")
+async def upload_files(files: List[UploadFile] = File(...)):
+    file_details = []
     for file in files:
+        content = await file.read()
+        name = f"{file.filename}"
+        details = {"content": content, "name": name}
+        file_details.append(details)
+    app.state.conversational_chain = Conversational_Chain(file_details).create_conversational_chain()
     print("conversational_chain_manager created")
     return {"message": "ConversationalRetrievalChain is created. Please ask questions."}
 @app.get("/predict/")
 async def predict(
     query: str,
 ):
+    if app.state.conversation_chain is None:
         system_prompt = "Answer the question and also ask the user to upload files to ask questions from the files.\n"
+        response = app.state.llm_model.invoke(system_prompt + query)
         answer = response.content
     else:
+        response = app.state.conversation_chain.invoke(query)
         answer = response["answer"]
     print("predict called")