MahatirTusher commited on
Commit
1d2d1fa
Β·
verified Β·
1 Parent(s): 791a001

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -1,19 +1,20 @@
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from langchain_community.document_loaders.url import UnstructuredURLLoader
4
- from langchain.embeddings import HuggingFaceEmbeddings # Local embeddings
 
5
  from langchain_community.vectorstores.faiss import FAISS
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  import os
8
  import time
9
- from langchain_groq import ChatGroq
10
  from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
11
 
12
- # Load environment variables (optional, not needed for embeddings)
13
  load_dotenv()
14
 
15
- # Hardcoded Groq API key
16
- GROQ_API_KEY = "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"
17
 
18
  # Set Streamlit app title
19
  st.title("News Research Tool πŸ“ˆ")
@@ -27,28 +28,32 @@ for i in range(3):
27
 
28
  # Button to process URLs
29
  process_url_clicked = st.sidebar.button("Process URLs")
30
- faiss Mok_index_path = "faiss_index"
31
 
32
  # Placeholder for main content
33
  main_placeholder = st.empty()
34
 
35
- # Initialize the Groq LLM
36
  llm = ChatGroq(
37
  api_key=GROQ_API_KEY,
38
- model="llama3-70b-8192"
39
  )
40
 
41
  def save_faiss_index(vectorstore, path):
 
42
  vectorstore.save_local(path)
43
 
44
  def load_faiss_index(path, embeddings):
 
45
  return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
46
 
47
  if process_url_clicked:
 
48
  loader = UnstructuredURLLoader(urls=urls)
49
  main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
50
  data = loader.load()
51
 
 
52
  text_splitter = RecursiveCharacterTextSplitter(
53
  separators=['\n\n', '\n', '.', ','],
54
  chunk_size=1000
@@ -56,26 +61,29 @@ if process_url_clicked:
56
  main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
57
  docs = text_splitter.split_documents(data)
58
 
59
- # Use local sentence-transformers embeddings (no API token needed)
60
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
61
  vectorstore_openai = FAISS.from_documents(docs, embeddings)
62
  main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
63
  time.sleep(2)
64
 
 
65
  save_faiss_index(vectorstore_openai, faiss_index_path)
66
 
67
  # Get query from user input
68
  query = main_placeholder.text_input("Question: ")
69
  if query:
70
  if os.path.exists(faiss_index_path):
71
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
72
  vectorstore = load_faiss_index(faiss_index_path, embeddings)
73
  chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
74
  result = chain({"question": query}, return_only_outputs=True)
75
 
 
76
  st.header("Answer")
77
  st.write(result["answer"])
78
 
 
79
  sources = result.get("sources", "")
80
  if sources:
81
  st.subheader("Sources:")
 
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from langchain_community.document_loaders.url import UnstructuredURLLoader
4
+ from langchain_community.embeddings import HuggingFaceHubEmbeddings
5
+ from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
6
  from langchain_community.vectorstores.faiss import FAISS
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  import os
9
  import time
10
+ from langchain_groq import ChatGroq # Updated to use Groq's ChatGroq
11
  from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
12
 
13
+ # Load environment variables (still needed for Hugging Face token)
14
  load_dotenv()
15
 
16
+ # Hardcoded Groq API key (NOT RECOMMENDED for production)
17
+ GROQ_API_KEY = "your_groq_api_key_here" # Replace with your actual Groq API key
18
 
19
  # Set Streamlit app title
20
  st.title("News Research Tool πŸ“ˆ")
 
28
 
29
  # Button to process URLs
30
  process_url_clicked = st.sidebar.button("Process URLs")
31
+ faiss_index_path = "faiss_index"
32
 
33
  # Placeholder for main content
34
  main_placeholder = st.empty()
35
 
36
+ # Initialize the Groq LLM (replaced ChatTogether with ChatGroq)
37
  llm = ChatGroq(
38
  api_key=GROQ_API_KEY,
39
+ model="llama3-70b-8192" # Groq-supported model (you can also use "llama-3.1-8b-instant")
40
  )
41
 
42
  def save_faiss_index(vectorstore, path):
43
+ # Save FAISS index and documents separately
44
  vectorstore.save_local(path)
45
 
46
  def load_faiss_index(path, embeddings):
47
+ # Load FAISS index and recreate vectorstore
48
  return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
49
 
50
  if process_url_clicked:
51
+ # Load data from URLs
52
  loader = UnstructuredURLLoader(urls=urls)
53
  main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
54
  data = loader.load()
55
 
56
+ # Split data into chunks
57
  text_splitter = RecursiveCharacterTextSplitter(
58
  separators=['\n\n', '\n', '.', ','],
59
  chunk_size=1000
 
61
  main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
62
  docs = text_splitter.split_documents(data)
63
 
64
+ # Create embeddings and save them to FAISS index
65
+ embeddings = HuggingFaceEndpointEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
66
  vectorstore_openai = FAISS.from_documents(docs, embeddings)
67
  main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
68
  time.sleep(2)
69
 
70
+ # Save the FAISS index to disk
71
  save_faiss_index(vectorstore_openai, faiss_index_path)
72
 
73
  # Get query from user input
74
  query = main_placeholder.text_input("Question: ")
75
  if query:
76
  if os.path.exists(faiss_index_path):
77
+ embeddings = HuggingFaceHubEmbeddings(huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))
78
  vectorstore = load_faiss_index(faiss_index_path, embeddings)
79
  chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
80
  result = chain({"question": query}, return_only_outputs=True)
81
 
82
+ # Display the answer
83
  st.header("Answer")
84
  st.write(result["answer"])
85
 
86
+ # Display sources, if available
87
  sources = result.get("sources", "")
88
  if sources:
89
  st.subheader("Sources:")