teagardan commited on
Commit
5fe7325
·
verified ·
1 Parent(s): 76ee088

Create lanchain pdf rag

Browse files
Files changed (1) hide show
  1. lanchain pdf rag +68 -0
lanchain pdf rag ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import PyPDF2
4
+ from langchain import LLMChain
5
+ from langchain.embeddings import OpenAIEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.llms import Llama
8
+
9
+ # Function to read and extract text from PDF files
10
+ def extract_text_from_pdfs(pdf_files):
11
+ pdf_texts = []
12
+ for pdf_file in pdf_files:
13
+ with open(pdf_file, 'rb') as file:
14
+ reader = PyPDF2.PdfFileReader(file)
15
+ text = ""
16
+ for page_num in range(reader.numPages):
17
+ text += reader.getPage(page_num).extractText()
18
+ pdf_texts.append(text)
19
+ return pdf_texts
20
+
21
+ # Function to create vector store from extracted texts
22
+ def create_vector_store(texts):
23
+ embeddings = OpenAIEmbeddings()
24
+ vector_store = FAISS.from_texts(texts, embeddings)
25
+ return vector_store
26
+
27
+ # Function to create RAG application
28
+ def create_rag_application(pdf_files):
29
+ # Step 1: Extract text from PDFs
30
+ pdf_texts = extract_text_from_pdfs(pdf_files)
31
+
32
+ # Step 2: Create vector store
33
+ vector_store = create_vector_store(pdf_texts)
34
+
35
+ # Step 3: Load LLAMA model
36
+ llm = Llama(model_name="llama-3.1")
37
+
38
+ # Step 4: Create LLMChain with vector store and LLAMA model
39
+ rag_chain = LLMChain(llm=llm, vector_store=vector_store)
40
+
41
+ return rag_chain
42
+
43
+ # Example usage
44
+ pdf_files = ["/Users/teagardan/Documents/Teagardan/Mission USA/CURSOR AI/LANGCHAIN LLM RAG"] # List of 100 PDF file paths
45
+ rag_application = create_rag_application(pdf_files)
46
+
47
+ import ollama
48
+
49
+ # Function to create RAG application using Ollama
50
+ def create_rag_application_with_ollama(pdf_files):
51
+ # Step 1: Extract text from PDFs
52
+ pdf_texts = extract_text_from_pdfs(pdf_files)
53
+
54
+ # Step 2: Create vector store
55
+ vector_store = create_vector_store(pdf_texts)
56
+
57
+ # Step 3: Load LLAMA model using Ollama
58
+ llm = ollama.Llama(model_name="llama-3.1")
59
+
60
+ # Step 4: Create LLMChain with vector store and LLAMA model
61
+ rag_chain = LLMChain(llm=llm, vector_store=vector_store)
62
+
63
+ return rag_chain
64
+
65
+ # Example usage with Ollama
66
+ pdf_files = ["/Users/teagardan/Documents/Teagardan/Mission USA/CURSOR AI/LANGCHAIN LLM RAG"] # List of 100 PDF file paths
67
+
68
+