| import streamlit as st | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain.llms import HuggingFaceHub | |
| from langchain.chains import RetrievalQA | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from pdfminer.high_level import extract_text | |
| def get_pdf_text(files): | |
| full_text = "" | |
| for file in files: | |
| text = extract_text(file) | |
| text = text.replace("\n", " ") | |
| full_text = text + full_text | |
| return full_text | |
| st.title("Embedding Creation for Langchain") | |
| st.header("File Upload") | |
| files = st.file_uploader("Upload your files", accept_multiple_files=True, type="pdf") | |
| if files: | |
| st.header("Start Conversion") | |
| if st.button("Ready!"): | |
| with st.spinner("Creating chain..."): | |
| full_text = get_pdf_text(files) | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150) | |
| chunks = text_splitter.split_text(full_text) | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| db = FAISS.from_texts(chunks, embeddings) | |
| memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True,) | |
| st.header("Chatbot") | |
| st.subheader("Ask a question") | |
| question = st.text_input("Question") | |
| similar_response = db.similarity_search(query=question, k=3) | |
| page_contents_array = [doc.page_contents for doc in similar_response] | |
| page_contents = " ".join(page_contents_array) | |
| st.write(page_contents) |