ChinarQ-AI commited on
Commit
a21fea9
Β·
verified Β·
1 Parent(s): 6121b60

Delete PDFprocess_sample.py

Browse files
Files changed (1) hide show
  1. PDFprocess_sample.py +0 -49
PDFprocess_sample.py DELETED
@@ -1,49 +0,0 @@
1
- import tempfile
2
- import streamlit as st
3
- import pickle
4
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
- from langchain_community.document_loaders import PyPDFLoader
6
- from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- from langchain_community.vectorstores import FAISS
8
- import faiss
9
-
10
-
11
- def process_pdf(uploaded_file):
12
-
13
- all_documents = []
14
- st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
15
-
16
- main_placeholder = st.empty()
17
- # Creating a temporary file to store the uploaded PDF's
18
- main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
19
- for uploaded_file in uploaded_file:
20
- with tempfile.NamedTemporaryFile(delete=False , suffix='.pdf') as temp_file:
21
- temp_file.write(uploaded_file.read()) ## write file to temporary
22
- temp_file_path = temp_file.name # Get the temporary file path
23
-
24
-
25
- # Load the PDF's from the temporary file path
26
-
27
-
28
- loader = PyPDFLoader(temp_file_path) # Document loader
29
- doc= loader.load() # load Document
30
- main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
31
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) # Recursive Character String
32
- #final_documents = text_splitter.split_documents(doc)# splitting
33
- final_documents = text_splitter.split_documents(doc)
34
- all_documents.extend(final_documents)
35
-
36
-
37
- if all_documents:
38
- main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
39
- st.session_state.vectors = FAISS.from_documents(all_documents,st.session_state.embeddings)
40
- st.session_state.docs = all_documents
41
-
42
- # Save FAISS vector store to disk
43
- faiss_index = st.session_state.vectors.index # Extract FAISS index
44
- faiss.write_index(faiss_index, "faiss_index.bin") # Save index to a binary file
45
- main_placeholder.text("Vector database created!...βœ…βœ…βœ…")
46
-
47
- else:
48
- st.error("No documents found after processing the uploaded files or the pdf is corrupted / unsupported.")
49
-