Spaces:

haouarin
/

pdf2text

Runtime error

App Files Files Community

HAOUARI Noureddine commited on Aug 4, 2023

Commit

6ce362f

1 Parent(s): a74e11e

first commit

Browse files

Files changed (2) hide show

app.py +45 -0
requirements.txt +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from PyPDF2 import PdfReader
+from concurrent.futures import ThreadPoolExecutor
+import streamlit as st
+import io
+from anthropic import Anthropic
+client = Anthropic()
+st.sidebar.title("API Configuration")
+api_key = st.sidebar.text_input("Enter your Open API key:")
+def convert_pdf_to_text(pdf_file_data, file_name):
+    text = "\n---\n"
+    text += f"file name: {file_name}\n content: \n"
+    pdf_reader = PdfReader(pdf_file_data)
+    # Extract all text at once
+    text += "".join([page.extract_text() for page in pdf_reader.pages])
+    text += "\n---\n"
+    return text
+def pdf_to_text(pdf_files_data, file_names):
+    # Create a ThreadPoolExecutor to run the conversion in parallel
+    with ThreadPoolExecutor() as executor:
+        # Use the executor to map the convert_pdf_to_text function over all the pdf_files_data
+        results = executor.map(convert_pdf_to_text, pdf_files_data, file_names)
+    return results
+st.title("PDF to Text Converter")
+st.markdown("Upload PDF files and get their content in text format.")
+uploaded_files = st.file_uploader(
+    "Upload PDF files", type="pdf", accept_multiple_files=True)
+if uploaded_files:
+    pdf_files_data = [io.BytesIO(uploaded_file.read())
+                      for uploaded_file in uploaded_files]
+    file_names = [uploaded_file.name for uploaded_file in uploaded_files]
+    if st.button('Convert'):
+        with st.spinner('Converting PDFs...'):
+            text = "\n".join(pdf_to_text(pdf_files_data, file_names))
+        st.text_area("Text content:", text, height=200)
+        st.write(f"Number of tokens: {client.count_tokens(text)}")

requirements.txt ADDED Viewed

Binary file (60 Bytes). View file