Spaces:

rvritesh167
/

Extract_text_From_pdf

Runtime error

rvritesh167 commited on Nov 11, 2023

Commit

1f70f29

1 Parent(s): 67927c6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import streamlit as st
 from langchain.document_loaders import UnstructuredFileLoader
 def main():
     st.title("PDF Text Extractor")
@@ -8,13 +12,24 @@ def main():
     if uploaded_file is not None:
         st.subheader("PDF Content:")
-        loader = UnstructuredFileLoader(uploaded_file)
         data = loader.load()
         txt = ''
         for item in data:
             txt += item.page_content
         text_content = txt
-        st.text(text_content)
 if __name__ == "__main__":
-    main()

 import streamlit as st
+import tempfile
+import os
+import pyperclip
 from langchain.document_loaders import UnstructuredFileLoader
 def main():
     st.title("PDF Text Extractor")
     if uploaded_file is not None:
         st.subheader("PDF Content:")
+        st.text("Extracted using langchain:")
+        temp_file_path = os.path.join(tempfile.gettempdir(), f"{uploaded_file.name}")
+        with open(temp_file_path, "wb") as temp_file:
+            temp_file.write(uploaded_file.read())
+        loader = UnstructuredFileLoader(temp_file_path)
         data = loader.load()
         txt = ''
         for item in data:
             txt += item.page_content
         text_content = txt
+        if st.button("Copy to Clipboard"):
+            pyperclip.copy(text_content)
+            st.success("Text copied to clipboard!")
+        st.text_area("Extracted Text:", value=text_content, height=300)
 if __name__ == "__main__":
+    main()