Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,19 +1,20 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
from langchain_community.document_loaders.url import UnstructuredURLLoader
|
| 4 |
-
from
|
|
|
|
| 5 |
from langchain_community.vectorstores.faiss import FAISS
|
| 6 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 7 |
import os
|
| 8 |
import time
|
| 9 |
-
from langchain_groq import ChatGroq
|
| 10 |
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
|
| 11 |
|
| 12 |
-
# Load environment variables (
|
| 13 |
load_dotenv()
|
| 14 |
|
| 15 |
-
# Hardcoded Groq API key
|
| 16 |
-
GROQ_API_KEY = "
|
| 17 |
|
| 18 |
# Set Streamlit app title
|
| 19 |
st.title("News Research Tool π")
|
|
@@ -27,28 +28,32 @@ for i in range(3):
|
|
| 27 |
|
| 28 |
# Button to process URLs
|
| 29 |
process_url_clicked = st.sidebar.button("Process URLs")
|
| 30 |
-
|
| 31 |
|
| 32 |
# Placeholder for main content
|
| 33 |
main_placeholder = st.empty()
|
| 34 |
|
| 35 |
-
# Initialize the Groq LLM
|
| 36 |
llm = ChatGroq(
|
| 37 |
api_key=GROQ_API_KEY,
|
| 38 |
-
model="llama3-70b-8192"
|
| 39 |
)
|
| 40 |
|
| 41 |
def save_faiss_index(vectorstore, path):
|
|
|
|
| 42 |
vectorstore.save_local(path)
|
| 43 |
|
| 44 |
def load_faiss_index(path, embeddings):
|
|
|
|
| 45 |
return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
|
| 46 |
|
| 47 |
if process_url_clicked:
|
|
|
|
| 48 |
loader = UnstructuredURLLoader(urls=urls)
|
| 49 |
main_placeholder.text("Data Loading...Started...β
β
β
")
|
| 50 |
data = loader.load()
|
| 51 |
|
|
|
|
| 52 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 53 |
separators=['\n\n', '\n', '.', ','],
|
| 54 |
chunk_size=1000
|
|
@@ -56,26 +61,29 @@ if process_url_clicked:
|
|
| 56 |
main_placeholder.text("Text Splitter...Started...β
β
β
")
|
| 57 |
docs = text_splitter.split_documents(data)
|
| 58 |
|
| 59 |
-
#
|
| 60 |
-
embeddings =
|
| 61 |
vectorstore_openai = FAISS.from_documents(docs, embeddings)
|
| 62 |
main_placeholder.text("Embedding Vector Started Building...β
β
β
")
|
| 63 |
time.sleep(2)
|
| 64 |
|
|
|
|
| 65 |
save_faiss_index(vectorstore_openai, faiss_index_path)
|
| 66 |
|
| 67 |
# Get query from user input
|
| 68 |
query = main_placeholder.text_input("Question: ")
|
| 69 |
if query:
|
| 70 |
if os.path.exists(faiss_index_path):
|
| 71 |
-
embeddings =
|
| 72 |
vectorstore = load_faiss_index(faiss_index_path, embeddings)
|
| 73 |
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
|
| 74 |
result = chain({"question": query}, return_only_outputs=True)
|
| 75 |
|
|
|
|
| 76 |
st.header("Answer")
|
| 77 |
st.write(result["answer"])
|
| 78 |
|
|
|
|
| 79 |
sources = result.get("sources", "")
|
| 80 |
if sources:
|
| 81 |
st.subheader("Sources:")
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
from langchain_community.document_loaders.url import UnstructuredURLLoader
|
| 4 |
+
from langchain_community.embeddings import HuggingFaceHubEmbeddings
|
| 5 |
+
from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
|
| 6 |
from langchain_community.vectorstores.faiss import FAISS
|
| 7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 8 |
import os
|
| 9 |
import time
|
| 10 |
+
from langchain_groq import ChatGroq # Updated to use Groq's ChatGroq
|
| 11 |
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
|
| 12 |
|
| 13 |
+
# Load environment variables (still needed for Hugging Face token)
|
| 14 |
load_dotenv()
|
| 15 |
|
| 16 |
+
# Hardcoded Groq API key (NOT RECOMMENDED for production)
|
| 17 |
+
GROQ_API_KEY = "your_groq_api_key_here" # Replace with your actual Groq API key
|
| 18 |
|
| 19 |
# Set Streamlit app title
|
| 20 |
st.title("News Research Tool π")
|
|
|
|
| 28 |
|
| 29 |
# Button to process URLs
|
| 30 |
process_url_clicked = st.sidebar.button("Process URLs")
|
| 31 |
+
faiss_index_path = "faiss_index"
|
| 32 |
|
| 33 |
# Placeholder for main content
|
| 34 |
main_placeholder = st.empty()
|
| 35 |
|
| 36 |
+
# Initialize the Groq LLM (replaced ChatTogether with ChatGroq)
|
| 37 |
llm = ChatGroq(
|
| 38 |
api_key=GROQ_API_KEY,
|
| 39 |
+
model="llama3-70b-8192" # Groq-supported model (you can also use "llama-3.1-8b-instant")
|
| 40 |
)
|
| 41 |
|
| 42 |
def save_faiss_index(vectorstore, path):
|
| 43 |
+
# Save FAISS index and documents separately
|
| 44 |
vectorstore.save_local(path)
|
| 45 |
|
| 46 |
def load_faiss_index(path, embeddings):
|
| 47 |
+
# Load FAISS index and recreate vectorstore
|
| 48 |
return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
|
| 49 |
|
| 50 |
if process_url_clicked:
|
| 51 |
+
# Load data from URLs
|
| 52 |
loader = UnstructuredURLLoader(urls=urls)
|
| 53 |
main_placeholder.text("Data Loading...Started...β
β
β
")
|
| 54 |
data = loader.load()
|
| 55 |
|
| 56 |
+
# Split data into chunks
|
| 57 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 58 |
separators=['\n\n', '\n', '.', ','],
|
| 59 |
chunk_size=1000
|
|
|
|
| 61 |
main_placeholder.text("Text Splitter...Started...β
β
β
")
|
| 62 |
docs = text_splitter.split_documents(data)
|
| 63 |
|
| 64 |
+
# Create embeddings and save them to FAISS index
|
| 65 |
+
embeddings = HuggingFaceEndpointEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
| 66 |
vectorstore_openai = FAISS.from_documents(docs, embeddings)
|
| 67 |
main_placeholder.text("Embedding Vector Started Building...β
β
β
")
|
| 68 |
time.sleep(2)
|
| 69 |
|
| 70 |
+
# Save the FAISS index to disk
|
| 71 |
save_faiss_index(vectorstore_openai, faiss_index_path)
|
| 72 |
|
| 73 |
# Get query from user input
|
| 74 |
query = main_placeholder.text_input("Question: ")
|
| 75 |
if query:
|
| 76 |
if os.path.exists(faiss_index_path):
|
| 77 |
+
embeddings = HuggingFaceHubEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
| 78 |
vectorstore = load_faiss_index(faiss_index_path, embeddings)
|
| 79 |
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
|
| 80 |
result = chain({"question": query}, return_only_outputs=True)
|
| 81 |
|
| 82 |
+
# Display the answer
|
| 83 |
st.header("Answer")
|
| 84 |
st.write(result["answer"])
|
| 85 |
|
| 86 |
+
# Display sources, if available
|
| 87 |
sources = result.get("sources", "")
|
| 88 |
if sources:
|
| 89 |
st.subheader("Sources:")
|