Spaces:

MahatirTusher
/

LazyAss-AI-Reader

Sleeping

App Files Files Community

MahatirTusher commited on Apr 22, 2025

Commit

1d2d1fa

verified ·

1 Parent(s): 791a001

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -11

app.py CHANGED Viewed

@@ -1,19 +1,20 @@
 import streamlit as st
 from dotenv import load_dotenv
 from langchain_community.document_loaders.url import UnstructuredURLLoader
-from langchain.embeddings import HuggingFaceEmbeddings  # Local embeddings
 from langchain_community.vectorstores.faiss import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import os
 import time
-from langchain_groq import ChatGroq
 from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
-# Load environment variables (optional, not needed for embeddings)
 load_dotenv()
-# Hardcoded Groq API key
-GROQ_API_KEY = "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"
 # Set Streamlit app title
 st.title("News Research Tool 📈")
@@ -27,28 +28,32 @@ for i in range(3):
 # Button to process URLs
 process_url_clicked = st.sidebar.button("Process URLs")
-faiss Mok_index_path = "faiss_index"
 # Placeholder for main content
 main_placeholder = st.empty()
-# Initialize the Groq LLM
 llm = ChatGroq(
     api_key=GROQ_API_KEY,
-    model="llama3-70b-8192"
 )
 def save_faiss_index(vectorstore, path):
     vectorstore.save_local(path)
 def load_faiss_index(path, embeddings):
     return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
 if process_url_clicked:
     loader = UnstructuredURLLoader(urls=urls)
     main_placeholder.text("Data Loading...Started...✅✅✅")
     data = loader.load()
     text_splitter = RecursiveCharacterTextSplitter(
         separators=['\n\n', '\n', '.', ','],
         chunk_size=1000
@@ -56,26 +61,29 @@ if process_url_clicked:
     main_placeholder.text("Text Splitter...Started...✅✅✅")
     docs = text_splitter.split_documents(data)
-    # Use local sentence-transformers embeddings (no API token needed)
-    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vectorstore_openai = FAISS.from_documents(docs, embeddings)
     main_placeholder.text("Embedding Vector Started Building...✅✅✅")
     time.sleep(2)
     save_faiss_index(vectorstore_openai, faiss_index_path)
 # Get query from user input
 query = main_placeholder.text_input("Question: ")
 if query:
     if os.path.exists(faiss_index_path):
-        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
         vectorstore = load_faiss_index(faiss_index_path, embeddings)
         chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
         result = chain({"question": query}, return_only_outputs=True)
         st.header("Answer")
         st.write(result["answer"])
         sources = result.get("sources", "")
         if sources:
             st.subheader("Sources:")

 import streamlit as st
 from dotenv import load_dotenv
 from langchain_community.document_loaders.url import UnstructuredURLLoader
+from langchain_community.embeddings import HuggingFaceHubEmbeddings
+from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
 from langchain_community.vectorstores.faiss import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import os
 import time
+from langchain_groq import ChatGroq  # Updated to use Groq's ChatGroq
 from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
+# Load environment variables (still needed for Hugging Face token)
 load_dotenv()
+# Hardcoded Groq API key (NOT RECOMMENDED for production)
+GROQ_API_KEY = "your_groq_api_key_here"  # Replace with your actual Groq API key
 # Set Streamlit app title
 st.title("News Research Tool 📈")
 # Button to process URLs
 process_url_clicked = st.sidebar.button("Process URLs")
+faiss_index_path = "faiss_index"
 # Placeholder for main content
 main_placeholder = st.empty()
+# Initialize the Groq LLM (replaced ChatTogether with ChatGroq)
 llm = ChatGroq(
     api_key=GROQ_API_KEY,
+    model="llama3-70b-8192"  # Groq-supported model (you can also use "llama-3.1-8b-instant")
 )
 def save_faiss_index(vectorstore, path):
+    # Save FAISS index and documents separately
     vectorstore.save_local(path)
 def load_faiss_index(path, embeddings):
+    # Load FAISS index and recreate vectorstore
     return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
 if process_url_clicked:
+    # Load data from URLs
     loader = UnstructuredURLLoader(urls=urls)
     main_placeholder.text("Data Loading...Started...✅✅✅")
     data = loader.load()
+    # Split data into chunks
     text_splitter = RecursiveCharacterTextSplitter(
         separators=['\n\n', '\n', '.', ','],
         chunk_size=1000
     main_placeholder.text("Text Splitter...Started...✅✅✅")
     docs = text_splitter.split_documents(data)
+    # Create embeddings and save them to FAISS index
+    embeddings = HuggingFaceEndpointEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
     vectorstore_openai = FAISS.from_documents(docs, embeddings)
     main_placeholder.text("Embedding Vector Started Building...✅✅✅")
     time.sleep(2)
+    # Save the FAISS index to disk
     save_faiss_index(vectorstore_openai, faiss_index_path)
 # Get query from user input
 query = main_placeholder.text_input("Question: ")
 if query:
     if os.path.exists(faiss_index_path):
+        embeddings = HuggingFaceHubEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
         vectorstore = load_faiss_index(faiss_index_path, embeddings)
         chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
         result = chain({"question": query}, return_only_outputs=True)
+        # Display the answer
         st.header("Answer")
         st.write(result["answer"])
+        # Display sources, if available
         sources = result.get("sources", "")
         if sources:
             st.subheader("Sources:")