Spaces:

SnehaAkula
/

HR_Doc

Runtime error

App Files Files Community

SnehaAkula commited on Mar 9, 2024

Commit

6728994

verified ·

1 Parent(s): a94ad2b

Upload 2 files

Browse files

Files changed (2) hide show

app.py +127 -0
requirements.txt +107 -0

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os
+import streamlit as st
+import fitz
+from PIL import Image
+import tempfile
+from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
+from langchain.chains.question_answering import load_qa_chain
+from langchain_openai import OpenAI
+from docx import Document
+import io
+# Set OpenAI API key
+os.environ["OPENAI_API_KEY"] = "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"
+# Initialize conversation history list
+if "conversation_history" not in st.session_state:
+    st.session_state.conversation_history = []
+# Function to load document and perform question answering (cached)
+from docx import Document
+@st.cache_data
+def process_document(uploaded_file, query):
+    # Save uploaded file to temporary directory
+    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+        tmp_file.write(uploaded_file.read())
+        # Load document based on file type
+        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
+        if file_extension == ".pdf":
+            loader = PyPDFLoader(tmp_file.name)
+            document_text = None
+        elif file_extension == ".docx":
+            loader = Docx2txtLoader(tmp_file.name)
+            document = Document(tmp_file.name)
+            document_text = "\n".join([paragraph.text for paragraph in document.paragraphs])
+        else:
+            st.error("Unsupported file format. Please upload a text file (.txt), a PDF file (.pdf), or a Word document (.docx).")
+            return "", None
+    documents = loader.load()
+    # Load QA chain
+    chain = load_qa_chain(llm=OpenAI(), verbose=True)
+    # Perform question answering
+    response = chain.invoke({"input_documents": documents, "question": query})
+    # Remove temporary file
+    os.unlink(tmp_file.name)
+    return response["output_text"], document_text
+# Function to update conversation history
+def update_conversation(query, response):
+    st.session_state.conversation_history.append({"question": query, "answer": response})
+# Function to convert PDF pages to images
+def pdf_to_images(pdf_bytes):
+    doc = fitz.open("pdf", pdf_bytes)
+    images = []
+    for page_num in range(doc.page_count):
+        page = doc[page_num]
+        image = page.get_pixmap()
+        img = Image.frombytes("RGB", [image.width, image.height], image.samples)
+        images.append(img)
+    return images
+# Streamlit UI
+def main():
+    # Set sidebar title
+    st.sidebar.title("7steps.AI")
+    st.sidebar.markdown("---")
+    # File uploader for document in sidebar
+    uploaded_file = st.sidebar.file_uploader("Upload a document", type=["pdf", "docx"])
+    # Display document content or images
+    if uploaded_file is not None:
+        st.title("Document Content")
+        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
+        if file_extension in [".docx"]:
+            _, document_text = process_document(uploaded_file, "")
+            if document_text is not None:
+                st.text_area("Document Text", value=document_text, height=300)
+        elif file_extension == ".pdf":
+            images = pdf_to_images(uploaded_file.getvalue())
+            if images:
+                page_number = st.number_input("Page Number", value=1, min_value=1, max_value=len(images))
+                st.image(images[page_number - 1], caption=f"Page {page_number}", use_column_width=True)
+                # Download button for images
+                img_bytes = io.BytesIO()
+                images[page_number - 1].save(img_bytes, format='PNG')
+                st.download_button("Download Image", img_bytes.getvalue(), f'Page_{page_number}.png')
+    # Text box for new question in sidebar
+    query = st.sidebar.text_input("Enter your question:")
+    # "Ask" button in sidebar
+    if st.sidebar.button("Ask"):
+        if uploaded_file is not None:
+            # Process document and display response
+            response, _ = process_document(uploaded_file, query)
+            if response:  # Check if response is not empty
+                # Update conversation history
+                update_conversation(query, response)
+        else:
+            st.sidebar.write("Please upload a document first.")
+    # Display conversation history
+    st.title("Conversation History")
+    for item in st.session_state.conversation_history:
+        st.write("You:", item["question"])
+        st.write("AI:", item["answer"])
+# Run the application
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,107 @@

+aiohttp==3.9.3
+aiosignal==1.3.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==4.3.0
+async-timeout==4.0.3
+attrs==23.2.0
+blinker==1.7.0
+cachetools==5.3.3
+certifi==2024.2.2
+charset-normalizer==3.3.2
+ci-info==0.3.0
+click==8.1.7
+colorama==0.4.6
+configobj==5.0.8
+configparser==6.0.1
+dataclasses-json==0.6.4
+distro==1.9.0
+docx2txt==0.8
+etelemetry==0.3.1
+exceptiongroup==1.2.0
+filelock==3.13.1
+frozenlist==1.4.1
+gitdb==4.0.11
+GitPython==3.1.42
+greenlet==3.0.3
+h11==0.14.0
+httpcore==1.0.4
+httplib2==0.22.0
+httpx==0.27.0
+idna==3.6
+importlib_resources==6.1.3
+isodate==0.6.1
+Jinja2==3.1.3
+jsonpatch==1.33
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+langchain==0.1.11
+langchain-community==0.0.27
+langchain-core==0.1.30
+langchain-openai==0.0.8
+langchain-text-splitters==0.0.1
+langsmith==0.1.23
+looseversion==1.3.0
+lxml==5.1.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.1
+mdurl==0.1.2
+multidict==6.0.5
+mypy-extensions==1.0.0
+networkx==3.1
+nibabel==5.2.1
+nipype==1.8.6
+numpy==1.24.4
+openai==1.13.3
+orjson==3.9.15
+packaging==23.2
+pandas==2.0.3
+pathlib==1.0.1
+pillow==10.2.0
+pkgutil_resolve_name==1.3.10
+protobuf==4.25.3
+prov==2.0.0
+pyarrow==15.0.1
+pydantic==2.6.3
+pydantic_core==2.16.3
+pydeck==0.8.1b0
+pydot==2.0.0
+Pygments==2.17.2
+PyMuPDF==1.23.26
+PyMuPDFb==1.23.22
+pyparsing==3.1.2
+pypdf==4.1.0
+python-dateutil==2.9.0.post0
+python-docx==1.1.0
+pytz==2024.1
+pyxnat==1.6.2
+PyYAML==6.0.1
+rdflib==7.0.0
+referencing==0.33.0
+regex==2023.12.25
+requests==2.31.0
+rich==13.7.1
+rpds-py==0.18.0
+scipy==1.10.1
+simplejson==3.19.2
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+SQLAlchemy==2.0.28
+streamlit==1.32.0
+tenacity==8.2.3
+tiktoken==0.6.0
+toml==0.10.2
+toolz==0.12.1
+tornado==6.4
+tqdm==4.66.2
+traits==6.3.2
+typing-inspect==0.9.0
+typing_extensions==4.10.0
+tzdata==2024.1
+urllib3==2.2.1
+watchdog==4.0.0
+yarl==1.9.4
+zipp==3.17.0