Spaces:

viboognesh
/

fastapiBackend

Runtime error

App Files Files Community

viboognesh commited on Mar 23, 2024

Commit

84015e9

verified ·

1 Parent(s): 5fa1281

Update main.py

Browse files

Files changed (1) hide show

main.py +27 -18

main.py CHANGED Viewed

@@ -1,13 +1,12 @@
 from fastapi import FastAPI, File, UploadFile, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from typing import List, Dict, Any
 from io import BytesIO, StringIO
 from docx import Document
 from langchain.docstore.document import Document as langchain_Document
 from PyPDF2 import PdfReader
 import csv
-from dotenv import load_dotenv
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
 from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
@@ -17,31 +16,34 @@ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 from langchain_community.vectorstores import Chroma
 from langchain.chains import ConversationalRetrievalChain
 load_dotenv()
 class Document_Processor:
-    def __init__(self , file_details: List[Dict[Any, str]]):
         self.file_details = file_details
     def get_docs(self) -> List[langchain_Document]:
         docs = []
         for file_detail in self.file_details:
             if file_detail["name"].endswith(".txt"):
-                docs.extend(self.get_txt_docs(file_detail))
             elif file_detail["name"].endswith(".csv"):
-                docs.extend(self.get_csv_docs(file_detail))
             elif file_detail["name"].endswith(".docx"):
-                docs.extend(self.get_docx_docs(file_detail))
             elif file_detail["name"].endswith(".pdf"):
-                docs.extend(self.get_pdf_docs(file_detail))
         return docs
     @staticmethod
-    def get_txt_docs(self, file_detail: Dict[str, Any]) -> List[langchain_Document]:
         text = file_detail["content"].decode("utf-8")
         source = file_detail["name"]
         text_splitter = RecursiveCharacterTextSplitter(
@@ -53,7 +55,7 @@ class Document_Processor:
         return text_docs
     @staticmethod
-    def get_csv_docs(self, file_detail: Dict[str, Any]) -> List[langchain_Document]:
         csv_data = file_detail["content"]
         source = file_detail["name"]
         csv_string = csv_data.decode("utf-8")
@@ -73,7 +75,7 @@ class Document_Processor:
         return csv_docs
     @staticmethod
-    def get_pdf_docs(self, file_detail: Dict[str, Any]) -> List[langchain_Document]:
         pdf_content = BytesIO(file_detail["content"])
         source = file_detail["name"]
@@ -82,27 +84,32 @@ class Document_Processor:
         for page in reader.pages:
             pdf_text += page.extract_text() + "\n"
-        pdf_docs = RecursiveCharacterTextSplitter.create_documents(
             texts=[pdf_text], metadatas=[{"source": source}]
         )
         return pdf_docs
     @staticmethod
-    def get_docx_docs(self, file_detail: Dict[str, Any]) -> List[langchain_Document]:
         docx_content = BytesIO(file_detail["content"])
         source = file_detail["name"]
         document = Document(docx_content)
         docx_text = " ".join([paragraph.text for paragraph in document.paragraphs])
-        docx_docs = RecursiveCharacterTextSplitter.create_documents(
             [docx_text], metadatas=[{"source": source}]
         )
         return docx_docs
 class Conversational_Chain:
     def __init__(self, file_details: List[Dict[Any, str]]):
         self.llm_model = ChatOpenAI()
         self.embeddings = OpenAIEmbeddings()
@@ -132,7 +139,7 @@ class Conversational_Chain:
         return conversation_chain
     @staticmethod
-    def get_document_prompt(self) -> PromptTemplate:
         document_template = """Document Content:{page_content}
     Document Path: {source}"""
         return PromptTemplate(
@@ -141,7 +148,7 @@ class Conversational_Chain:
         )
     @staticmethod
-    def get_question_generator_prompt(self) -> PromptTemplate:
         question_generator_template = """Combine the chat history and follow up question into
     a standalone question.\n Chat History: {chat_history}\n
     Follow up question: {question}
@@ -149,7 +156,7 @@ class Conversational_Chain:
         return PromptTemplate.from_template(question_generator_template)
     @staticmethod
-    def get_final_prompt(self) -> ChatPromptTemplate:
         final_prompt_template = """Answer question based on the context and chat_history.
     If you cannot find answers, ask more related questions from the user.
     Use only the basename of the file path as name of the documents.
@@ -201,7 +208,9 @@ async def upload_files(files: List[UploadFile] = File(...)):
         details = {"content": content, "name": name}
         file_details.append(details)
-    app.state.conversational_chain = Conversational_Chain(file_details).create_conversational_chain()
     print("conversational_chain_manager created")
     return {"message": "ConversationalRetrievalChain is created. Please ask questions."}

 from fastapi import FastAPI, File, UploadFile, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from typing import List, Dict, Any
 from io import BytesIO, StringIO
 from docx import Document
 from langchain.docstore.document import Document as langchain_Document
 from PyPDF2 import PdfReader
 import csv
 from langchain.prompts import ChatPromptTemplate, PromptTemplate
 from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
 from langchain_community.vectorstores import Chroma
 from langchain.chains import ConversationalRetrievalChain
+from dotenv import load_dotenv
 load_dotenv()
 class Document_Processor:
+    def __init__(self, file_details: List[Dict[Any, str]]):
         self.file_details = file_details
     def get_docs(self) -> List[langchain_Document]:
         docs = []
         for file_detail in self.file_details:
             if file_detail["name"].endswith(".txt"):
+                docs.extend(self.get_txt_docs(file_detail=file_detail))
             elif file_detail["name"].endswith(".csv"):
+                docs.extend(self.get_csv_docs(file_detail=file_detail))
             elif file_detail["name"].endswith(".docx"):
+                docs.extend(self.get_docx_docs(file_detail=file_detail))
             elif file_detail["name"].endswith(".pdf"):
+                docs.extend(self.get_pdf_docs(file_detail=file_detail))
         return docs
     @staticmethod
+    def get_txt_docs(file_detail: Dict[str, Any]) -> List[langchain_Document]:
         text = file_detail["content"].decode("utf-8")
         source = file_detail["name"]
         text_splitter = RecursiveCharacterTextSplitter(
         return text_docs
     @staticmethod
+    def get_csv_docs(file_detail: Dict[str, Any]) -> List[langchain_Document]:
         csv_data = file_detail["content"]
         source = file_detail["name"]
         csv_string = csv_data.decode("utf-8")
         return csv_docs
     @staticmethod
+    def get_pdf_docs(file_detail: Dict[str, Any]) -> List[langchain_Document]:
         pdf_content = BytesIO(file_detail["content"])
         source = file_detail["name"]
         for page in reader.pages:
             pdf_text += page.extract_text() + "\n"
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000, chunk_overlap=100
+        )
+        pdf_docs = text_splitter.create_documents(
             texts=[pdf_text], metadatas=[{"source": source}]
         )
         return pdf_docs
     @staticmethod
+    def get_docx_docs(file_detail: Dict[str, Any]) -> List[langchain_Document]:
         docx_content = BytesIO(file_detail["content"])
         source = file_detail["name"]
         document = Document(docx_content)
         docx_text = " ".join([paragraph.text for paragraph in document.paragraphs])
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000, chunk_overlap=100
+        )
+        docx_docs = text_splitter.create_documents(
             [docx_text], metadatas=[{"source": source}]
         )
         return docx_docs
 class Conversational_Chain:
     def __init__(self, file_details: List[Dict[Any, str]]):
         self.llm_model = ChatOpenAI()
         self.embeddings = OpenAIEmbeddings()
         return conversation_chain
     @staticmethod
+    def get_document_prompt() -> PromptTemplate:
         document_template = """Document Content:{page_content}
     Document Path: {source}"""
         return PromptTemplate(
         )
     @staticmethod
+    def get_question_generator_prompt() -> PromptTemplate:
         question_generator_template = """Combine the chat history and follow up question into
     a standalone question.\n Chat History: {chat_history}\n
     Follow up question: {question}
         return PromptTemplate.from_template(question_generator_template)
     @staticmethod
+    def get_final_prompt() -> ChatPromptTemplate:
         final_prompt_template = """Answer question based on the context and chat_history.
     If you cannot find answers, ask more related questions from the user.
     Use only the basename of the file path as name of the documents.
         details = {"content": content, "name": name}
         file_details.append(details)
+    app.state.conversational_chain = Conversational_Chain(
+        file_details
+    ).create_conversational_chain()
     print("conversational_chain_manager created")
     return {"message": "ConversationalRetrievalChain is created. Please ask questions."}