Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- XYZ_contract_pdf_Sumit Yenugwar.pdf +0 -0
- app.py +91 -0
- requirements.txt +6 -0
XYZ_contract_pdf_Sumit Yenugwar.pdf
ADDED
|
Binary file (265 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from PyPDF2 import PdfReader
|
| 3 |
+
|
| 4 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 5 |
+
from langchain.text_splitter import CharacterTextSplitter
|
| 6 |
+
from langchain.vectorstores import FAISS
|
| 7 |
+
from langchain.chains.question_answering import load_qa_chain
|
| 8 |
+
from langchain.llms import OpenAI
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# Set API keys
|
| 14 |
+
os.environ["OPENAI_API_KEY"] = "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"
|
| 15 |
+
os.environ["SERPAPI_API_KEY"] = "28c2445d1bfe7530595be6fbc858b1968d776af69c6034aa5feda50deab4b990"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# PDF processing
|
| 19 |
+
pdfreader = PdfReader('XYZ_contract_pdf_Sumit Yenugwar.pdf')
|
| 20 |
+
raw_text = ''
|
| 21 |
+
for i, page in enumerate(pdfreader.pages):
|
| 22 |
+
content = page.extract_text()
|
| 23 |
+
if content:
|
| 24 |
+
raw_text += content
|
| 25 |
+
|
| 26 |
+
# Text splitting
|
| 27 |
+
text_splitter = CharacterTextSplitter(
|
| 28 |
+
separator="\n",
|
| 29 |
+
chunk_size=4000,
|
| 30 |
+
chunk_overlap=1000,
|
| 31 |
+
length_function=len,
|
| 32 |
+
)
|
| 33 |
+
texts = text_splitter.split_text(raw_text)
|
| 34 |
+
|
| 35 |
+
# Download embeddings from OpenAI
|
| 36 |
+
embeddings = OpenAIEmbeddings()
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# Create document search
|
| 40 |
+
document_search = FAISS.from_texts(texts, embeddings)
|
| 41 |
+
|
| 42 |
+
#########################################################
|
| 43 |
+
document_search.save_local("faiss_index")
|
| 44 |
+
|
| 45 |
+
#below lines loads the vectorized data that was saved in previous code line
|
| 46 |
+
new_document_search = FAISS.load_local("faiss_index", embeddings)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
##################################
|
| 50 |
+
|
| 51 |
+
# Load QA chain
|
| 52 |
+
chain = load_qa_chain(OpenAI(), chain_type="stuff")
|
| 53 |
+
|
| 54 |
+
with st.sidebar:
|
| 55 |
+
st.title('🤗💬 LLM Chat APP')
|
| 56 |
+
st.markdown('''
|
| 57 |
+
## About
|
| 58 |
+
This app is an LLM-powered chatbot built using:
|
| 59 |
+
- [Streamlit](https://streamlit.io/)
|
| 60 |
+
- [LangChain](https://python.langchain.com/)
|
| 61 |
+
- [OpenAI](https://platform.openai.com/docs/models) LLM model
|
| 62 |
+
|
| 63 |
+
''')
|
| 64 |
+
st.markdown("<br>", unsafe_allow_html=True) # Add vertical space
|
| 65 |
+
st.write('Made with ❤️ by [Prompt Engineer](https://www.youtube.com/watch?v=M4mc-z_K1NU&list=PLUTApKyNO6MwrOioHGaFCeXtZpchoGv6W)')
|
| 66 |
+
|
| 67 |
+
# Streamlit app
|
| 68 |
+
def main():
|
| 69 |
+
st.title("DAMA-Data Management body of knowledge")
|
| 70 |
+
|
| 71 |
+
# Text input area
|
| 72 |
+
user_input = st.text_area("Enter your MCQ question ",height=150)
|
| 73 |
+
|
| 74 |
+
# Button to trigger model inference
|
| 75 |
+
if st.button("Get Answer"):
|
| 76 |
+
# Combine user input with the prompt and query
|
| 77 |
+
prompt_query = f"you have provided with MCQ question and its option as a chatbot model: {user_input}"
|
| 78 |
+
text_query = prompt_query + user_input
|
| 79 |
+
|
| 80 |
+
# Perform similarity search
|
| 81 |
+
docs = new_document_search.similarity_search(text_query)
|
| 82 |
+
|
| 83 |
+
# Run the model with the combined text and query
|
| 84 |
+
model_answer = chain.run(input_documents=docs, question=user_input)
|
| 85 |
+
|
| 86 |
+
# Display the model's answer
|
| 87 |
+
st.text_area("Model Answer:", value=model_answer)
|
| 88 |
+
|
| 89 |
+
# Run the Streamlit app
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit==1.29.0
|
| 2 |
+
PyPDF2==3.0.1
|
| 3 |
+
langchain==0.0.345 # Replace with the correct version if available
|
| 4 |
+
openai==1.3.7 # Replace with the correct version if available
|
| 5 |
+
numpy==1.26.2
|
| 6 |
+
tiktoken==0.5.2
|