Spaces:

Samagra07
/

News_Research_Tool

Sleeping

App Files Files Community

News_Research_Tool / app.py

Samagra07

Update app.py

8ae3042 verified 8 months ago

raw

history blame contribute delete

3.07 kB

	import streamlit as st
	from dotenv import load_dotenv
	from langchain_community.document_loaders.url import UnstructuredURLLoader
	from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
	from langchain_community.vectorstores.faiss import FAISS
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	import os
	import time
	from langchain_together import ChatTogether
	from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain

	# Load environment variables
	load_dotenv()

	# Set Streamlit app title
	st.title("News Research Tool 📈")
	st.sidebar.title("News Article URLs")

	# Get URLs from user input
	urls = []
	for i in range(3):
	url = st.sidebar.text_input(f"URL {i+1}")
	urls.append(url)

	# Button to process URLs
	process_url_clicked = st.sidebar.button("Process URLs")
	faiss_index_path = "faiss_index"

	# Placeholder for main content
	main_placeholder = st.empty()

	# Initialize the OpenAI LLM
	llm = ChatTogether(model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", api_key=os.getenv("OPENAI_API_KEY"))


	def save_faiss_index(vectorstore, path):
	# Save FAISS index and documents separately
	vectorstore.save_local(path)

	def load_faiss_index(path, embeddings):
	# Load FAISS index and recreate vectorstore
	return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)

	if process_url_clicked:
	# Load data from URLs
	loader = UnstructuredURLLoader(urls=urls)
	main_placeholder.text("Data Loading...Started...✅✅✅")
	data = loader.load()

	# Split data into chunks
	text_splitter = RecursiveCharacterTextSplitter(
	separators=['\n\n', '\n', '.', ','],
	chunk_size=1000
	)
	main_placeholder.text("Text Splitter...Started...✅✅✅")
	docs = text_splitter.split_documents(data)

	# Create embeddings and save them to FAISS index
	embeddings = HuggingFaceEndpointEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
	vectorstore_openai = FAISS.from_documents(docs, embeddings)
	main_placeholder.text("Embedding Vector Started Building...✅✅✅")
	time.sleep(2)

	# Save the FAISS index to disk
	save_faiss_index(vectorstore_openai, faiss_index_path)

	# Get query from user input
	query = main_placeholder.text_input("Question: ")
	if query:
	if os.path.exists(faiss_index_path):
	embeddings = HuggingFaceEndpointEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
	vectorstore = load_faiss_index(faiss_index_path, embeddings)
	chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
	result = chain({"question": query}, return_only_outputs=True)

	# Display the answer
	st.header("Answer")
	st.write(result["answer"])

	# Display sources, if available
	sources = result.get("sources", "")
	if sources:
	st.subheader("Sources:")
	sources_list = sources.split("\n")
	for source in sources_list:
	st.write(source)