Uzaiir commited on
Commit
37567cd
Β·
verified Β·
1 Parent(s): c11a592

Update src/PDFprocess_sample.py

Browse files
Files changed (1) hide show
  1. src/PDFprocess_sample.py +15 -15
src/PDFprocess_sample.py CHANGED
@@ -1,7 +1,7 @@
1
  import tempfile
2
  import streamlit as st
3
  import pickle
4
- # from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_community.vectorstores import FAISS
@@ -73,22 +73,22 @@ def process_pdf(uploaded_files):
73
  doc = loader.load()
74
  main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
75
 
76
- # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
77
- # final_documents = text_splitter.split_documents(doc)
78
- # all_documents.extend(final_documents)
79
 
80
- # if all_documents:
81
- # main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
82
 
83
- # # ⏬ Move embedding initialization here
84
- # st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
85
- # st.session_state.vectors = FAISS.from_documents(all_documents, st.session_state.embeddings)
86
- # st.session_state.docs = all_documents
87
 
88
- # faiss_index = st.session_state.vectors.index
89
- # faiss.write_index(faiss_index, "faiss_index.bin")
90
- # main_placeholder.text("Vector database created!...βœ…βœ…βœ…")
91
 
92
- # else:
93
- # st.error("No documents found or the PDF is corrupted.")
94
 
 
1
  import tempfile
2
  import streamlit as st
3
  import pickle
4
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_community.vectorstores import FAISS
 
73
  doc = loader.load()
74
  main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
75
 
76
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
77
+ final_documents = text_splitter.split_documents(doc)
78
+ all_documents.extend(final_documents)
79
 
80
+ if all_documents:
81
+ main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
82
 
83
+ # ⏬ Move embedding initialization here
84
+ st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
85
+ st.session_state.vectors = FAISS.from_documents(all_documents, st.session_state.embeddings)
86
+ st.session_state.docs = all_documents
87
 
88
+ faiss_index = st.session_state.vectors.index
89
+ faiss.write_index(faiss_index, "faiss_index.bin")
90
+ main_placeholder.text("Vector database created!...βœ…βœ…βœ…")
91
 
92
+ else:
93
+ st.error("No documents found or the PDF is corrupted.")
94