adflkajf commited on
Commit
c9dfd05
·
1 Parent(s): 5f27552

Upload 3 files

Browse files
XYZ_contract_pdf_Sumit Yenugwar.pdf ADDED
Binary file (265 kB). View file
 
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain.llms import OpenAI
9
+ import os
10
+
11
+
12
+
13
+ # Set API keys
14
+ os.environ["OPENAI_API_KEY"] = "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"
15
+ os.environ["SERPAPI_API_KEY"] = "28c2445d1bfe7530595be6fbc858b1968d776af69c6034aa5feda50deab4b990"
16
+
17
+
18
+ # PDF processing
19
+ pdfreader = PdfReader('XYZ_contract_pdf_Sumit Yenugwar.pdf')
20
+ raw_text = ''
21
+ for i, page in enumerate(pdfreader.pages):
22
+ content = page.extract_text()
23
+ if content:
24
+ raw_text += content
25
+
26
+ # Text splitting
27
+ text_splitter = CharacterTextSplitter(
28
+ separator="\n",
29
+ chunk_size=4000,
30
+ chunk_overlap=1000,
31
+ length_function=len,
32
+ )
33
+ texts = text_splitter.split_text(raw_text)
34
+
35
+ # Download embeddings from OpenAI
36
+ embeddings = OpenAIEmbeddings()
37
+
38
+
39
+ # Create document search
40
+ document_search = FAISS.from_texts(texts, embeddings)
41
+
42
+ #########################################################
43
+ document_search.save_local("faiss_index")
44
+
45
+ #below lines loads the vectorized data that was saved in previous code line
46
+ new_document_search = FAISS.load_local("faiss_index", embeddings)
47
+
48
+
49
+ ##################################
50
+
51
+ # Load QA chain
52
+ chain = load_qa_chain(OpenAI(), chain_type="stuff")
53
+
54
+ with st.sidebar:
55
+ st.title('🤗💬 LLM Chat APP')
56
+ st.markdown('''
57
+ ## About
58
+ This app is an LLM-powered chatbot built using:
59
+ - [Streamlit](https://streamlit.io/)
60
+ - [LangChain](https://python.langchain.com/)
61
+ - [OpenAI](https://platform.openai.com/docs/models) LLM model
62
+
63
+ ''')
64
+ st.markdown("<br>", unsafe_allow_html=True) # Add vertical space
65
+ st.write('Made with ❤️ by [Prompt Engineer](https://www.youtube.com/watch?v=M4mc-z_K1NU&list=PLUTApKyNO6MwrOioHGaFCeXtZpchoGv6W)')
66
+
67
+ # Streamlit app
68
+ def main():
69
+ st.title("DAMA-Data Management body of knowledge")
70
+
71
+ # Text input area
72
+ user_input = st.text_area("Enter your MCQ question ",height=150)
73
+
74
+ # Button to trigger model inference
75
+ if st.button("Get Answer"):
76
+ # Combine user input with the prompt and query
77
+ prompt_query = f"you have provided with MCQ question and its option as a chatbot model: {user_input}"
78
+ text_query = prompt_query + user_input
79
+
80
+ # Perform similarity search
81
+ docs = new_document_search.similarity_search(text_query)
82
+
83
+ # Run the model with the combined text and query
84
+ model_answer = chain.run(input_documents=docs, question=user_input)
85
+
86
+ # Display the model's answer
87
+ st.text_area("Model Answer:", value=model_answer)
88
+
89
+ # Run the Streamlit app
90
+ if __name__ == "__main__":
91
+ main()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit==1.29.0
2
+ PyPDF2==3.0.1
3
+ langchain==0.0.345 # Replace with the correct version if available
4
+ openai==1.3.7 # Replace with the correct version if available
5
+ numpy==1.26.2
6
+ tiktoken==0.5.2