Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| import pickle | |
| import time | |
| from langchain.chains import RetrievalQA | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import UnstructuredURLLoader | |
| #from langchain.vectorstores import FAISS | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEndpoint | |
| from sentence_transformers import SentenceTransformer | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain import HuggingFaceHub | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| repo_id = "mistralai/Mistral-7B-Instruct-v0.3"#"mistralai/Mistral-7B-Instruct-v0.3" | |
| llm = HuggingFaceHub( | |
| repo_id=repo_id, | |
| task="text-generation", | |
| huggingfacehub_api_token=os.getenv("HF_TOKEN_FOR_WEBSEARCH"), | |
| model_kwargs={"temperature": 0.6, | |
| "max_tokens":1000} | |
| ) | |
| st.title("LinkWise 🔎") | |
| st.sidebar.title("Article URLs") | |
| # Initialize session state to store the number of URL inputs | |
| if 'url_count' not in st.session_state: | |
| st.session_state.url_count = 1 # Start with 3 URL placeholders | |
| # Function to add a new URL input | |
| def add_url(): | |
| st.session_state.url_count += 1 | |
| # List to store the URLs | |
| urls = [] | |
| # Create the URL input fields dynamically | |
| for i in range(st.session_state.url_count): | |
| url = st.sidebar.text_input(f"URL {i+1}") | |
| urls.append(url) | |
| # Add a button to increase the number of URLs | |
| st.sidebar.button("Add another URL", on_click=add_url) | |
| process_url_clicked=st.sidebar.button("Submit URLs") | |
| # urls=[] | |
| # for i in range(3): | |
| # url=st.sidebar.text_input(f"URL {i+1}") | |
| # urls.append(url) | |
| # process_url_clicked=st.sidebar.button("Process URLs") | |
| file_path="faiss_store_db.pkl" | |
| placeholder=st.empty() | |
| if process_url_clicked: | |
| #Loading the data | |
| loader=UnstructuredURLLoader(urls=urls) | |
| placeholder.text("Data Loading started...") | |
| data=loader.load() | |
| #Splitting the data | |
| text_splitter=RecursiveCharacterTextSplitter( | |
| separators=['\n\n','\n','.','.'], | |
| chunk_size=600, | |
| chunk_overlap=100 | |
| ) | |
| placeholder.text("Splitting of Data Started...") | |
| docs=text_splitter.split_documents(data) | |
| #creating embeddings | |
| model_name = "sentence-transformers/all-mpnet-base-v2" #"sentence-transformers/all-MiniLM-L6-v2" | |
| hf_embeddings = HuggingFaceEmbeddings(model_name=model_name) | |
| vector_index=FAISS.from_documents(docs,hf_embeddings) | |
| placeholder.text("Started Building Embedded Vector...") | |
| #saving in FAISS store | |
| with open(file_path,'wb') as f: | |
| pickle.dump(vector_index,f) | |
| query=placeholder.text_input("Question :") | |
| submit=st.button("Submit") | |
| if query: | |
| if os.path.exists(file_path): | |
| with open(file_path,'rb') as f: | |
| vector_index=pickle.load(f) | |
| retrieval_qa = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", # You can use 'stuff', 'map_reduce', or 'refine' depending on your use case | |
| retriever=vector_index.as_retriever() | |
| ) | |
| result=retrieval_qa({'query':query}) | |
| text=result['result'] | |
| start_index = text.find("\nHelpful Answer:") | |
| # Extract everything after "\nHelpful Answer:" if it exists | |
| if start_index != -1: | |
| parsed_text =text[start_index + len("\nHelpful Answer:"):] | |
| parsed_text = parsed_text.strip() # Optionally strip any extra whitespace | |
| if query or submit: | |
| st.header("Answer :") | |
| st.write(parsed_text) | |