ChinarQ-AI commited on
Commit
fa3ff28
·
verified ·
1 Parent(s): c8db483

Upload 3 files

Browse files
Files changed (3) hide show
  1. src/CSS/style.css +85 -0
  2. src/PDFprocess_sample.py +49 -0
  3. src/app.py +132 -0
src/CSS/style.css ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .st-emotion-cache-bm2z3a{
2
+ background-color: #28a745;
3
+ }
4
+
5
+ .st-emotion-cache-12fmjuu {
6
+ background-color: #28a745;
7
+ }
8
+ .st-emotion-cache-6qob1r {
9
+ background-color: #007bff
10
+ }
11
+
12
+ .st-emotion-cache-1iqhbn7 {
13
+ background-color: #28a745;
14
+ }
15
+
16
+ .st-emotion-cache-1jfa4hj {
17
+ background-color: #28a745;
18
+ }
19
+
20
+ .st-emotion-cache-taue2i {
21
+ background-color: #007bff
22
+ }
23
+ .st-emotion-cache-n5r31u {
24
+ border-radius: 25px;
25
+
26
+ }
27
+ .st-emotion-cache-n5r31u:hover {
28
+ border-color: #28a745;
29
+ color: #28a745;
30
+ }
31
+
32
+ .st-emotion-cache-1bps1dx:hover {
33
+ background-color: #007bff
34
+ }
35
+
36
+ h1 {
37
+ color: aliceblue;
38
+ display: flex;
39
+ justify-content: center;
40
+ font-weight: 100;
41
+ }
42
+
43
+ .st-emotion-cache-1v6glgu > ul[role="listbox"]:not(:last-child) {
44
+ background-color: #28a745;
45
+ }
46
+
47
+ .st-emotion-cache-1iqhbn7:hover {
48
+ background-color: #28a745;
49
+ }
50
+
51
+ .st-cr {
52
+ border: 1px solid #28a745;
53
+ }
54
+
55
+ p, ol, ul, dl {
56
+ margin: 0px 0px 1rem;
57
+ padding: 0px;
58
+ font-size: 1 rem;
59
+ font-weight: 400;
60
+ color: rgb(0, 0, 0);
61
+ }
62
+
63
+
64
+
65
+
66
+ .st-emotion-cache-13ln4jf {
67
+ width: 100%;
68
+ padding: 2rem 1rem 10rem;
69
+ max-width: 68rem;
70
+ }
71
+
72
+
73
+ .card {
74
+ background-color: #f8f9fa;
75
+ border-radius: 10px;
76
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
77
+ padding: 20px;
78
+ margin-bottom: 20px;
79
+ }
80
+ .response {
81
+ font-size: 18px;
82
+ font-weight: bold;
83
+ margin-bottom: 10px;
84
+ color: #333333;
85
+ }
src/PDFprocess_sample.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import streamlit as st
3
+ import pickle
4
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_community.vectorstores import FAISS
8
+ import faiss
9
+
10
+
11
+ def process_pdf(uploaded_file):
12
+
13
+ all_documents = []
14
+ st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
15
+
16
+ main_placeholder = st.empty()
17
+ # Creating a temporary file to store the uploaded PDF's
18
+ main_placeholder.text("Data Loading...Started...✅✅✅")
19
+ for uploaded_file in uploaded_file:
20
+ with tempfile.NamedTemporaryFile(delete=False , suffix='.pdf') as temp_file:
21
+ temp_file.write(uploaded_file.read()) ## write file to temporary
22
+ temp_file_path = temp_file.name # Get the temporary file path
23
+
24
+
25
+ # Load the PDF's from the temporary file path
26
+
27
+
28
+ loader = PyPDFLoader(temp_file_path) # Document loader
29
+ doc= loader.load() # load Document
30
+ main_placeholder.text("Text Splitter...Started...✅✅✅")
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) # Recursive Character String
32
+ #final_documents = text_splitter.split_documents(doc)# splitting
33
+ final_documents = text_splitter.split_documents(doc)
34
+ all_documents.extend(final_documents)
35
+
36
+
37
+ if all_documents:
38
+ main_placeholder.text("Embedding Vector Started Building...✅✅✅")
39
+ st.session_state.vectors = FAISS.from_documents(all_documents,st.session_state.embeddings)
40
+ st.session_state.docs = all_documents
41
+
42
+ # Save FAISS vector store to disk
43
+ faiss_index = st.session_state.vectors.index # Extract FAISS index
44
+ faiss.write_index(faiss_index, "faiss_index.bin") # Save index to a binary file
45
+ main_placeholder.text("Vector database created!...✅✅✅")
46
+
47
+ else:
48
+ st.error("No documents found after processing the uploaded files or the pdf is corrupted / unsupported.")
49
+
src/app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain_groq import ChatGroq
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.chains.combine_documents import create_stuff_documents_chain
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain.chains import create_retrieval_chain
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
10
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
11
+ from dotenv import load_dotenv
12
+ from PDFprocess_sample import process_pdf
13
+
14
+ # Loading GROQ and Google API
15
+ load_dotenv()
16
+
17
+ GROQ_API_KEY = os.getenv('GROQ_API_KEY')
18
+ os.environ["GOOGLE_API_KEY"]= os.getenv('GOOGLE_API_KEY')
19
+
20
+ #Loading CSS files
21
+
22
+ def load_css(file_name):
23
+ with open(file_name) as f:
24
+ css = f.read()
25
+ st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
26
+
27
+ load_css('CSS/style.css')
28
+
29
+ #setting up LLM
30
+ llm = ChatGroq(
31
+ api_key=GROQ_API_KEY,
32
+ model_name="Llama3-8b-8192"
33
+ )
34
+
35
+
36
+ prompt = ChatPromptTemplate.from_template(
37
+ """
38
+ Answer the questions based on the provided context only.
39
+ Please provide the most accurate response based on the question. Try to answer in detail in 1500 words
40
+ <context>
41
+ {context}
42
+ <context>
43
+ Questions: {input}
44
+ """
45
+ )
46
+
47
+ input_method = st.sidebar.selectbox("Choose a method" , ["Choose input method...","Interact with Doc", "Get Ques from Doc"])
48
+
49
+
50
+
51
+ st.sidebar.title("Upload your pdf")
52
+
53
+ main_placeholder = st.empty()
54
+ #Document upload
55
+ uploaded_file = st.sidebar.file_uploader("_____________________________________", type="pdf", accept_multiple_files=True)
56
+ st.sidebar.write("Press Submit to process:")
57
+ process = st.sidebar.button("Submit")
58
+
59
+ #Document processing to convert it into vectors
60
+ if process:
61
+ if uploaded_file:
62
+ # Process the uploaded PDF file
63
+ process_pdf(uploaded_file)
64
+ else:
65
+ st.warning("Please upload a PDF file.")
66
+
67
+ if input_method == "Choose input method...":
68
+ st.title(f"Welcome You all!")
69
+ st.title("Choose an option in the sidebar")
70
+ st.title("Now, let's get started!")
71
+
72
+
73
+ #If User wants to interact with the document
74
+ elif input_method == "Interact with Doc":
75
+ st.title(f"let's Interact with pdf's")
76
+
77
+ prompt1 = st.text_input("______", placeholder="Enter your Question")
78
+
79
+
80
+ # Generate response if question is entered
81
+ if prompt1 and "vectors" in st.session_state:
82
+ document_chain = create_stuff_documents_chain(llm, prompt)
83
+ retriever = st.session_state.vectors.as_retriever()
84
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
85
+
86
+
87
+ response = retrieval_chain.invoke({'input': prompt1})
88
+
89
+ # st.write(response['answer'])
90
+
91
+ #Get the respose in the card
92
+ st.markdown(
93
+ f"""
94
+ <div class="card">
95
+ <div class="response">{response['answer']}</div>
96
+ </div>
97
+ """,
98
+ unsafe_allow_html=True,
99
+ )
100
+
101
+
102
+
103
+ #When User wants to get questions from the doc based on certain topic
104
+ elif input_method == "Get Ques from Doc":
105
+ st.title(f"Let's Get Ques from Document")
106
+
107
+ prompt2 = """Based on the topic of {topic},
108
+ kindly provide a comprehensive list of all possible questions that could arise.
109
+ For each question, provide detailed and explanatory answers in atleast 1000 words detail based on the context,
110
+ ensuring that the responses are as informative as possible.
111
+ make sure you strictly follow the {topic}"""
112
+ topic = st.text_input("Enter a topic", placeholder="What is your topic")
113
+
114
+ # Generate response if question is entered
115
+ if topic and "vectors" in st.session_state:
116
+ document_chain = create_stuff_documents_chain(llm, prompt)
117
+ retriever = st.session_state.vectors.as_retriever()
118
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
119
+
120
+
121
+ response = retrieval_chain.invoke({'input': prompt2})
122
+
123
+ #Get the respose in the card
124
+ st.markdown(
125
+ f"""
126
+ <div class="card">
127
+ <div class="response">{response['answer']}</div>
128
+ </div>
129
+ """,
130
+ unsafe_allow_html=True,
131
+ )
132
+