Spaces:

mikepastor11
/

PennwickFileAnalyzer

Sleeping

App Files Files Community

mikepastor11 commited on Feb 15, 2024

Commit

56250cf

verified ·

1 Parent(s): efc9f3e

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -18

app.py CHANGED Viewed

@@ -33,7 +33,7 @@ from langchain.chains import ConversationalRetrievalChain
 #  from langchain.llms import HuggingFaceHub
 from langchain_community.llms import HuggingFaceHub
-def get_pdf_text(pdf_docs):
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
@@ -43,7 +43,7 @@ def get_pdf_text(pdf_docs):
 #  Chunk size and overlap must not exceed the models capacity!
 #
-def get_text_chunks(text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
         chunk_size=800,    #  1000
@@ -54,7 +54,7 @@ def get_text_chunks(text):
     return chunks
-def get_vectorstore(text_chunks):
     st.write('Here in vector store....', unsafe_allow_html=True)
     # embeddings = OpenAIEmbeddings()
@@ -81,7 +81,7 @@ def get_vectorstore(text_chunks):
     return vectorstore
-def get_conversation_chain(vectorstore):
     # llm = ChatOpenAI()
     #  llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
     #  google/bigbird-roberta-base     facebook/bart-large
@@ -96,13 +96,12 @@ def get_conversation_chain(vectorstore):
     )
     return conversation_chain
-def handle_userinput(user_question):
     response = st.session_state.conversation({'question': user_question})
     # response = st.session_state.conversation({'summarization': user_question})
     st.session_state.chat_history = response['chat_history']
     # st.empty()
     for i, message in enumerate(st.session_state.chat_history):
@@ -114,17 +113,14 @@ def handle_userinput(user_question):
             st.write(bot_template.replace(
                 "{{MSG}}", message.content), unsafe_allow_html=True)
 def main():
     # load_dotenv()
     #  st.set_page_config(page_title="Pennwick PDF Analyzer", page_icon=":books:")
-    im = Image.open("robot_icon.ico")
-    st.set_page_config(page_title="Pennwick PDF Analyzer", page_icon=im )
     st.write(css, unsafe_allow_html=True)
@@ -138,7 +134,7 @@ def main():
     user_question = st.text_input("Ask the Model a question about your uploaded documents:")
     if user_question:
-        handle_userinput(user_question)
     # st.write( user_template, unsafe_allow_html=True)
     # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
@@ -164,18 +160,18 @@ def main():
                 st.write("Vectorizing Files - Current Time =", global_current_time)
                 # get pdf text
-                raw_text = get_pdf_text(pdf_docs)
                 #  st.write(raw_text)
                 # # get the text chunks
-                text_chunks = get_text_chunks(raw_text)
                 # st.write(text_chunks)
                 # # create vector store
-                vectorstore = get_vectorstore(text_chunks)
                 # # create conversation chain
-                st.session_state.conversation = get_conversation_chain(vectorstore)
                 # Mission Complete!
                 global_later = datetime.now()

 #  from langchain.llms import HuggingFaceHub
 from langchain_community.llms import HuggingFaceHub
+def extract_pdf_text(pdf_docs):
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
 #  Chunk size and overlap must not exceed the models capacity!
 #
+def extract_bitesize_pieces(text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
         chunk_size=800,    #  1000
     return chunks
+def prepare_embedding_vectors(text_chunks):
     st.write('Here in vector store....', unsafe_allow_html=True)
     # embeddings = OpenAIEmbeddings()
     return vectorstore
+def prepare_conversation(vectorstore):
     # llm = ChatOpenAI()
     #  llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
     #  google/bigbird-roberta-base     facebook/bart-large
     )
     return conversation_chain
+def process_user_question(user_question):
     response = st.session_state.conversation({'question': user_question})
     # response = st.session_state.conversation({'summarization': user_question})
     st.session_state.chat_history = response['chat_history']
     # st.empty()
     for i, message in enumerate(st.session_state.chat_history):
             st.write(bot_template.replace(
                 "{{MSG}}", message.content), unsafe_allow_html=True)
+###################################################################################
 def main():
     # load_dotenv()
     #  st.set_page_config(page_title="Pennwick PDF Analyzer", page_icon=":books:")
+    # im = Image.open("robot_icon.ico")
+    # st.set_page_config(page_title="Pennwick PDF Analyzer", page_icon=im )
+    st.set_page_config(page_title="Pennwick PDF Analyzer")
     st.write(css, unsafe_allow_html=True)
     user_question = st.text_input("Ask the Model a question about your uploaded documents:")
     if user_question:
+        process_user_question(user_question)
     # st.write( user_template, unsafe_allow_html=True)
     # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
                 st.write("Vectorizing Files - Current Time =", global_current_time)
                 # get pdf text
+                raw_text = extract_pdf_text(pdf_docs)
                 #  st.write(raw_text)
                 # # get the text chunks
+                text_chunks = extract_bitesize_pieces(raw_text)
                 # st.write(text_chunks)
                 # # create vector store
+                vectorstore = prepare_embedding_vectors(text_chunks)
                 # # create conversation chain
+                st.session_state.conversation = prepare_conversation(vectorstore)
                 # Mission Complete!
                 global_later = datetime.now()