AjiNiktech commited on
Commit
ad9a31a
·
verified ·
1 Parent(s): 67e7c91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -5
app.py CHANGED
@@ -41,12 +41,33 @@ if "OPENAI_API_KEY" in os.environ:
41
  # data = data1 + data2
42
  st.header('Multiple File Upload')
43
  uploaded_files = st.file_uploader('Upload your files',accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
44
- all_documents = []
45
- for file in uploaded_files:
46
- documents = load_document(file)
47
- all_documents.extend(documents)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
49
- all_splits = text_splitter.split_documents(all_documents)
50
  embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
51
  vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
52
  retriever = vectorstore.as_retriever(k=4)
 
41
  # data = data1 + data2
42
  st.header('Multiple File Upload')
43
  uploaded_files = st.file_uploader('Upload your files',accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
44
+ def load_file(file):
45
+ file_extension = os.path.splitext(file.name)[1].lower()
46
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as temp_file:
47
+ temp_file.write(file.getvalue())
48
+ temp_file_path = temp_file.name
49
+
50
+ if file_extension == '.txt':
51
+ loader = TextLoader(temp_file_path)
52
+ elif file_extension == '.pdf':
53
+ loader = PyPDFLoader(temp_file_path)
54
+ elif file_extension == '.csv':
55
+ loader = CSVLoader(temp_file_path)
56
+ elif file_extension in ['.ppt', '.pptx']:
57
+ loader = UnstructuredPowerPointLoader(temp_file_path)
58
+ elif file_extension in ['.doc', '.docx']:
59
+ loader = UnstructuredWordDocumentLoader(temp_file_path)
60
+ elif file_extension in ['.xls', '.xlsx']:
61
+ loader = UnstructuredExcelLoader(temp_file_path)
62
+ else:
63
+ os.unlink(temp_file_path)
64
+ raise ValueError(f"Unsupported file type: {file_extension}")
65
+
66
+ documents = loader.load()
67
+ os.unlink(temp_file_path)
68
+ return documents
69
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
70
+ all_splits = text_splitter.split_documents(documents)
71
  embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
72
  vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
73
  retriever = vectorstore.as_retriever(k=4)