kartik2627 commited on
Commit
d56be15
·
verified ·
1 Parent(s): e142560

create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ import faiss
4
+ import numpy as np
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.llms import OpenAI
8
+ from langchain.chains import RAGChain
9
+
10
+ def extract_text_from_pdf(pdf_file):
11
+ reader = PyPDF2.PdfReader(pdf_file)
12
+ text = ''
13
+ for page in reader.pages:
14
+ text += page.extract_text()
15
+ return text
16
+
17
+ def create_embeddings(text):
18
+ embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
19
+ embeddings = embedding_model.embed_documents([text])
20
+ return embeddings
21
+
22
+ def create_faiss_index(embeddings):
23
+ dim = len(embeddings[0])
24
+ index = faiss.IndexFlatL2(dim)
25
+ embeddings_np = np.array(embeddings).astype('float32')
26
+ index.add(embeddings_np)
27
+ return index
28
+
29
+ def create_rag_chain(index):
30
+ llm = OpenAI(model="gpt-3.5-turbo")
31
+ rag_chain = RAGChain(llm=llm, vector_store=index)
32
+ return rag_chain
33
+
34
+ def retrieve_and_generate(query, rag_chain):
35
+ response = rag_chain.run(query)
36
+ return response
37
+
38
+ def main():
39
+ st.title("RAG Application with FAISS & PDF")
40
+
41
+ pdf_file = st.file_uploader("Upload your PDF document", type="pdf")
42
+
43
+ if pdf_file is not None:
44
+ text = extract_text_from_pdf(pdf_file)
45
+ st.subheader("Extracted Text from PDF")
46
+ st.write(text[:1000])
47
+
48
+ embeddings = create_embeddings(text)
49
+ index = create_faiss_index(embeddings)
50
+ rag_chain = create_rag_chain(index)
51
+
52
+ query = st.text_input("Enter your query:")
53
+
54
+ if query:
55
+ response = retrieve_and_generate(query, rag_chain)
56
+ st.subheader("Answer from RAG Model:")
57
+ st.write(response)
58
+
59
+ if __name__ == "__main__":
60
+ main()