HAOUARI Noureddine commited on
Commit
6ce362f
·
1 Parent(s): a74e11e

first commit

Browse files
Files changed (2) hide show
  1. app.py +45 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ from concurrent.futures import ThreadPoolExecutor
3
+ import streamlit as st
4
+ import io
5
+ from anthropic import Anthropic
6
+ client = Anthropic()
7
+
8
+ st.sidebar.title("API Configuration")
9
+ api_key = st.sidebar.text_input("Enter your Open API key:")
10
+
11
+
12
+ def convert_pdf_to_text(pdf_file_data, file_name):
13
+ text = "\n---\n"
14
+ text += f"file name: {file_name}\n content: \n"
15
+ pdf_reader = PdfReader(pdf_file_data)
16
+ # Extract all text at once
17
+ text += "".join([page.extract_text() for page in pdf_reader.pages])
18
+ text += "\n---\n"
19
+ return text
20
+
21
+
22
+ def pdf_to_text(pdf_files_data, file_names):
23
+ # Create a ThreadPoolExecutor to run the conversion in parallel
24
+ with ThreadPoolExecutor() as executor:
25
+ # Use the executor to map the convert_pdf_to_text function over all the pdf_files_data
26
+ results = executor.map(convert_pdf_to_text, pdf_files_data, file_names)
27
+
28
+ return results
29
+
30
+
31
+ st.title("PDF to Text Converter")
32
+ st.markdown("Upload PDF files and get their content in text format.")
33
+
34
+ uploaded_files = st.file_uploader(
35
+ "Upload PDF files", type="pdf", accept_multiple_files=True)
36
+
37
+ if uploaded_files:
38
+ pdf_files_data = [io.BytesIO(uploaded_file.read())
39
+ for uploaded_file in uploaded_files]
40
+ file_names = [uploaded_file.name for uploaded_file in uploaded_files]
41
+ if st.button('Convert'):
42
+ with st.spinner('Converting PDFs...'):
43
+ text = "\n".join(pdf_to_text(pdf_files_data, file_names))
44
+ st.text_area("Text content:", text, height=200)
45
+ st.write(f"Number of tokens: {client.count_tokens(text)}")
requirements.txt ADDED
Binary file (60 Bytes). View file