JARVISXIRONMAN commited on
Commit
f4482a3
·
verified ·
1 Parent(s): ac20d3a

Create components/upload_files.py

Browse files
Files changed (1) hide show
  1. components/upload_files.py +43 -0
components/upload_files.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # components/upload_files.py
2
+
3
+ import streamlit as st
4
+ import os
5
+ from langchain.document_loaders import PyPDFLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.vectorstores import Chroma
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+
10
+ def run_upload_files():
11
+ st.header("📂 Upload Business Documents")
12
+
13
+ uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
14
+
15
+ if uploaded_file:
16
+ with st.spinner("Processing document..."):
17
+ os.makedirs("data/user_docs", exist_ok=True)
18
+
19
+ # Save uploaded PDF
20
+ file_path = os.path.join("data/user_docs", uploaded_file.name)
21
+ with open(file_path, "wb") as f:
22
+ f.write(uploaded_file.read())
23
+
24
+ # Load PDF text
25
+ loader = PyPDFLoader(file_path)
26
+ pages = loader.load_and_split()
27
+
28
+ # Split into chunks
29
+ splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
30
+ docs = splitter.split_documents(pages)
31
+
32
+ # Create embeddings
33
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
34
+
35
+ vectordb = Chroma.from_documents(
36
+ documents=docs,
37
+ embedding=embeddings,
38
+ persist_directory="data/vectorstore"
39
+ )
40
+ vectordb.persist()
41
+ st.session_state.vectorstore = vectordb
42
+
43
+ st.success("✅ Document processed and added to your knowledge base.")