ngcanh commited on
Commit
201bc19
·
verified ·
1 Parent(s): c45d1f5

Update pages/management.py

Browse files
Files changed (1) hide show
  1. pages/management.py +54 -54
pages/management.py CHANGED
@@ -1,68 +1,68 @@
1
- import os
2
- import streamlit as st
3
- from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
4
- from langchain.text_splitter import CharacterTextSplitter
5
- from app import vectorstore
6
 
7
 
8
- st.title("Document Management")
9
 
10
- # File uploader
11
- uploaded_file = st.file_uploader("Choose a file", type=['txt', 'pdf', 'docx'])
12
 
13
- if uploaded_file is not None:
14
- # Create a temporary directory to store the uploaded file
15
- temp_dir = "temp_uploads"
16
- os.makedirs(temp_dir, exist_ok=True)
17
- file_path = os.path.join(temp_dir, uploaded_file.name)
18
 
19
- # Save the uploaded file temporarily
20
- with open(file_path, "wb") as f:
21
- f.write(uploaded_file.getbuffer())
22
 
23
- st.success(f"File {uploaded_file.name} successfully uploaded!")
24
 
25
- # Process the uploaded file
26
- if st.button("Process Document"):
27
- with st.spinner("Processing document..."):
28
- try:
29
- # Load the document based on file type
30
- if uploaded_file.type == "application/pdf":
31
- loader = PyPDFLoader(file_path)
32
- elif uploaded_file.type == "text/plain":
33
- loader = TextLoader(file_path)
34
- else:
35
- st.error("Unsupported file type.")
36
- st.stop()
37
 
38
- documents = loader.load()
39
 
40
- # Split the document into chunks
41
- text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=150)
42
- texts = text_splitter.split_documents(documents)
43
 
44
- # Add the chunks to the vectorstore
45
- vectorstore.add_documents(texts)
46
 
47
- st.success(f"Document processed and added to the knowledge base!")
48
- except Exception as e:
49
- st.error(f"An error occurred: {e}")
50
 
51
- # Clean up: remove the temporary file
52
- os.remove(file_path)
53
 
54
- # Display current documents in the knowledge base
55
- # st.subheader("Current Documents in Knowledge Base")
56
- # # This is a placeholder. You'll need to implement a method to retrieve and display
57
- # # the list of documents currently in your Chroma database.
58
- # st.write("Placeholder for document list")
59
 
60
- # # Option to clear the entire knowledge base
61
- # if st.button("Clear Knowledge Base"):
62
- # if st.sidebar.checkbox("Are you sure you want to clear the entire knowledge base? This action cannot be undone."):
63
- # try:
64
- # # Clear the Chroma database
65
- # vectorstore.delete()
66
- # st.success("Knowledge base cleared!")
67
- # except Exception as e:
68
- # st.error(f"An error occurred: {e}")
 
1
+ # import os
2
+ # import streamlit as st
3
+ # from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
4
+ # from langchain.text_splitter import CharacterTextSplitter
5
+ # from app import vectorstore
6
 
7
 
8
+ # st.title("Document Management")
9
 
10
+ # # File uploader
11
+ # uploaded_file = st.file_uploader("Choose a file", type=['txt', 'pdf', 'docx'])
12
 
13
+ # if uploaded_file is not None:
14
+ # # Create a temporary directory to store the uploaded file
15
+ # temp_dir = "temp_uploads"
16
+ # os.makedirs(temp_dir, exist_ok=True)
17
+ # file_path = os.path.join(temp_dir, uploaded_file.name)
18
 
19
+ # # Save the uploaded file temporarily
20
+ # with open(file_path, "wb") as f:
21
+ # f.write(uploaded_file.getbuffer())
22
 
23
+ # st.success(f"File {uploaded_file.name} successfully uploaded!")
24
 
25
+ # # Process the uploaded file
26
+ # if st.button("Process Document"):
27
+ # with st.spinner("Processing document..."):
28
+ # try:
29
+ # # Load the document based on file type
30
+ # if uploaded_file.type == "application/pdf":
31
+ # loader = PyPDFLoader(file_path)
32
+ # elif uploaded_file.type == "text/plain":
33
+ # loader = TextLoader(file_path)
34
+ # else:
35
+ # st.error("Unsupported file type.")
36
+ # st.stop()
37
 
38
+ # documents = loader.load()
39
 
40
+ # # Split the document into chunks
41
+ # text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=150)
42
+ # texts = text_splitter.split_documents(documents)
43
 
44
+ # # Add the chunks to the vectorstore
45
+ # vectorstore.add_documents(texts)
46
 
47
+ # st.success(f"Document processed and added to the knowledge base!")
48
+ # except Exception as e:
49
+ # st.error(f"An error occurred: {e}")
50
 
51
+ # # Clean up: remove the temporary file
52
+ # os.remove(file_path)
53
 
54
+ # # Display current documents in the knowledge base
55
+ # # st.subheader("Current Documents in Knowledge Base")
56
+ # # # This is a placeholder. You'll need to implement a method to retrieve and display
57
+ # # # the list of documents currently in your Chroma database.
58
+ # # st.write("Placeholder for document list")
59
 
60
+ # # # Option to clear the entire knowledge base
61
+ # # if st.button("Clear Knowledge Base"):
62
+ # # if st.sidebar.checkbox("Are you sure you want to clear the entire knowledge base? This action cannot be undone."):
63
+ # # try:
64
+ # # # Clear the Chroma database
65
+ # # vectorstore.delete()
66
+ # # st.success("Knowledge base cleared!")
67
+ # # except Exception as e:
68
+ # # st.error(f"An error occurred: {e}")