Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| import pickle | |
| import pinecone | |
| import time | |
| from langchain import OpenAI | |
| from langchain.chains import RetrievalQAWithSourcesChain | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import UnstructuredURLLoader | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.chains.question_answering import load_qa_chain | |
| from langchain.vectorstores import FAISS | |
| from langchain.vectorstores import Pinecone | |
| from dotenv import load_dotenv | |
| load_dotenv() # take environment variables from .env (especially openai api key) | |
| st.title("Research Tool π") | |
| st.sidebar.title("Article URLs") | |
| urls = [] | |
| for i in range(3): | |
| url = st.sidebar.text_input(f"URL {i+1}") | |
| urls.append(url) | |
| main_placeholder = st.empty() | |
| query = main_placeholder.text_input("Question: ") | |
| if query: | |
| loader = UnstructuredURLLoader(urls=urls) | |
| main_placeholder.text("Data Loading...Started...β β β ") | |
| data = loader.load() | |
| # split data | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| separators=['\n\n', '\n', '.', ','], | |
| chunk_size=1000 | |
| ) | |
| main_placeholder.text("Text Splitter...Started...β β β ") | |
| docs = text_splitter.split_documents(data) | |
| # create embeddings and save it to FAISS index | |
| embeddings = OpenAIEmbeddings(api_key=os.getenv('OPENAI_API_KEY')) | |
| pinecone.init( | |
| api_key=os.getenv('PINECONE_API_KEY'), | |
| environment="gcp-starter" | |
| ) | |
| index_name = "langchainvector" | |
| index = Pinecone.from_documents(docs, embeddings, index_name=index_name) | |
| main_placeholder.text("Embedding Vector Started Building...β β β ") | |
| def retrieve_query(mquery, k=3): | |
| matching_results = index.similarity_search(mquery, k=k) | |
| return matching_results | |
| llm = OpenAI(temperature=0.5) | |
| chain = load_qa_chain(llm, chain_type="stuff") | |
| def retrieve_ans(mquery): | |
| doc_search = retrieve_query(mquery) | |
| print(doc_search) | |
| response = chain.run(input_documents = doc_search, question=query) | |
| return response | |
| result = retrieve_ans(query) | |
| st.header("Answer") | |
| st.write(result) | |
| # Display sources, if available | |
| # sources = result.get("sources", "") | |
| # if sources: | |
| # st.subheader("Sources:") | |
| # sources_list = sources.split("\n") # Split the sources by newline | |
| # for source in sources_list: | |
| # st.write(source) | |