Uzaiir commited on
Commit
2d42345
Β·
verified Β·
1 Parent(s): cb666fa

Update src/PDFprocess_sample.py

Browse files
Files changed (1) hide show
  1. src/PDFprocess_sample.py +15 -15
src/PDFprocess_sample.py CHANGED
@@ -1,7 +1,7 @@
1
  import tempfile
2
  import streamlit as st
3
  import pickle
4
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_community.vectorstores import FAISS
@@ -73,22 +73,22 @@ def process_pdf(uploaded_files):
73
  doc = loader.load()
74
  main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
75
 
76
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
77
- final_documents = text_splitter.split_documents(doc)
78
- all_documents.extend(final_documents)
79
 
80
- if all_documents:
81
- main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
82
 
83
- # ⏬ Move embedding initialization here
84
- st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
85
- st.session_state.vectors = FAISS.from_documents(all_documents, st.session_state.embeddings)
86
- st.session_state.docs = all_documents
87
 
88
- faiss_index = st.session_state.vectors.index
89
- faiss.write_index(faiss_index, "faiss_index.bin")
90
- main_placeholder.text("Vector database created!...βœ…βœ…βœ…")
91
 
92
- else:
93
- st.error("No documents found or the PDF is corrupted.")
94
 
 
1
  import tempfile
2
  import streamlit as st
3
  import pickle
4
+ # from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_community.vectorstores import FAISS
 
73
  doc = loader.load()
74
  main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
75
 
76
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
77
+ # final_documents = text_splitter.split_documents(doc)
78
+ # all_documents.extend(final_documents)
79
 
80
+ # if all_documents:
81
+ # main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
82
 
83
+ # # ⏬ Move embedding initialization here
84
+ # st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
85
+ # st.session_state.vectors = FAISS.from_documents(all_documents, st.session_state.embeddings)
86
+ # st.session_state.docs = all_documents
87
 
88
+ # faiss_index = st.session_state.vectors.index
89
+ # faiss.write_index(faiss_index, "faiss_index.bin")
90
+ # main_placeholder.text("Vector database created!...βœ…βœ…βœ…")
91
 
92
+ # else:
93
+ # st.error("No documents found or the PDF is corrupted.")
94