Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from dotenv import load_dotenv | |
| from langchain_community.document_loaders.url import UnstructuredURLLoader | |
| from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings | |
| from langchain_community.vectorstores.faiss import FAISS | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| import os | |
| import time | |
| from langchain_together import ChatTogether | |
| from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain | |
| # Load environment variables | |
| load_dotenv() | |
| # Set Streamlit app title | |
| st.title("News Research Tool π") | |
| st.sidebar.title("News Article URLs") | |
| # Get URLs from user input | |
| urls = [] | |
| for i in range(3): | |
| url = st.sidebar.text_input(f"URL {i+1}") | |
| urls.append(url) | |
| # Button to process URLs | |
| process_url_clicked = st.sidebar.button("Process URLs") | |
| faiss_index_path = "faiss_index" | |
| # Placeholder for main content | |
| main_placeholder = st.empty() | |
| # Initialize the OpenAI LLM | |
| llm = ChatTogether(model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", api_key=os.getenv("OPENAI_API_KEY")) | |
| def save_faiss_index(vectorstore, path): | |
| # Save FAISS index and documents separately | |
| vectorstore.save_local(path) | |
| def load_faiss_index(path, embeddings): | |
| # Load FAISS index and recreate vectorstore | |
| return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True) | |
| if process_url_clicked: | |
| # Load data from URLs | |
| loader = UnstructuredURLLoader(urls=urls) | |
| main_placeholder.text("Data Loading...Started...β β β ") | |
| data = loader.load() | |
| # Split data into chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| separators=['\n\n', '\n', '.', ','], | |
| chunk_size=1000 | |
| ) | |
| main_placeholder.text("Text Splitter...Started...β β β ") | |
| docs = text_splitter.split_documents(data) | |
| # Create embeddings and save them to FAISS index | |
| embeddings = HuggingFaceEndpointEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) | |
| vectorstore_openai = FAISS.from_documents(docs, embeddings) | |
| main_placeholder.text("Embedding Vector Started Building...β β β ") | |
| time.sleep(2) | |
| # Save the FAISS index to disk | |
| save_faiss_index(vectorstore_openai, faiss_index_path) | |
| # Get query from user input | |
| query = main_placeholder.text_input("Question: ") | |
| if query: | |
| if os.path.exists(faiss_index_path): | |
| embeddings = HuggingFaceEndpointEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) | |
| vectorstore = load_faiss_index(faiss_index_path, embeddings) | |
| chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever()) | |
| result = chain({"question": query}, return_only_outputs=True) | |
| # Display the answer | |
| st.header("Answer") | |
| st.write(result["answer"]) | |
| # Display sources, if available | |
| sources = result.get("sources", "") | |
| if sources: | |
| st.subheader("Sources:") | |
| sources_list = sources.split("\n") | |
| for source in sources_list: | |
| st.write(source) |