MahatirTusher commited on
Commit
aa8ba53
Β·
verified Β·
1 Parent(s): 70ccb58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -42
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from langchain_community.document_loaders.url import UnstructuredURLLoader
4
- from langchain.embeddings import HuggingFaceEmbeddings # Local embeddings
5
  from langchain_community.vectorstores.faiss import FAISS
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  import os
@@ -9,7 +9,7 @@ import time
9
  from langchain_groq import ChatGroq
10
  from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
11
 
12
- # Load environment variables (optional, not needed for hardcoded key)
13
  load_dotenv()
14
 
15
  # Hardcoded Groq API key (NOT RECOMMENDED for production)
@@ -19,11 +19,16 @@ GROQ_API_KEY = "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"
19
  st.title("News Research Tool πŸ“ˆ")
20
  st.sidebar.title("News Article URLs")
21
 
 
 
 
 
22
  # Get URLs from user input
23
  urls = []
24
  for i in range(3):
25
  url = st.sidebar.text_input(f"URL {i+1}")
26
- urls.append(url)
 
27
 
28
  # Button to process URLs
29
  process_url_clicked = st.sidebar.button("Process URLs")
@@ -45,45 +50,68 @@ def load_faiss_index(path, embeddings):
45
  return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
46
 
47
  if process_url_clicked:
48
- try:
49
- loader = UnstructuredURLLoader(urls=urls)
50
- main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
51
- data = loader.load()
52
-
53
- text_splitter = RecursiveCharacterTextSplitter(
54
- separators=['\n\n', '\n', '.', ','],
55
- chunk_size=1000
56
- )
57
- main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
58
- docs = text_splitter.split_documents(data)
59
-
60
- # Use local embeddings (no Hugging Face API token)
61
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
62
- vectorstore_openai = FAISS.from_documents(docs, embeddings)
63
- main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
64
- time.sleep(2)
65
-
66
- save_faiss_index(vectorstore_openai, faiss_index_path)
67
- except Exception as e:
68
- main_placeholder.error(f"Error processing URLs: {str(e)}")
69
-
70
- query = main_placeholder.text_input("Question: ")
71
- if query:
72
- if os.path.exists(faiss_index_path):
73
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
75
- vectorstore = load_faiss_index(faiss_index_path, embeddings)
76
- chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
77
- result = chain({"question": query}, return_only_outputs=True)
78
-
79
- st.header("Answer")
80
- st.write(result["answer"])
81
-
82
- sources = result.get("sources", "")
83
- if sources:
84
- st.subheader("Sources:")
85
- sources_list = sources.split("\n")
86
- for source in sources_list:
87
- st.write(source)
88
  except Exception as e:
89
- main_placeholder.error(f"Error answering query: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from langchain_community.document_loaders.url import UnstructuredURLLoader
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
  from langchain_community.vectorstores.faiss import FAISS
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  import os
 
9
  from langchain_groq import ChatGroq
10
  from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
11
 
12
+ # Load environment variables (optional)
13
  load_dotenv()
14
 
15
  # Hardcoded Groq API key (NOT RECOMMENDED for production)
 
19
  st.title("News Research Tool πŸ“ˆ")
20
  st.sidebar.title("News Article URLs")
21
 
22
+ # Initialize session state for FAISS index
23
+ if "index_created" not in st.session_state:
24
+ st.session_state.index_created = False
25
+
26
  # Get URLs from user input
27
  urls = []
28
  for i in range(3):
29
  url = st.sidebar.text_input(f"URL {i+1}")
30
+ if url:
31
+ urls.append(url)
32
 
33
  # Button to process URLs
34
  process_url_clicked = st.sidebar.button("Process URLs")
 
50
  return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
51
 
52
  if process_url_clicked:
53
+ if not urls:
54
+ main_placeholder.error("Please provide at least one valid URL.")
55
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  try:
57
+ main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
58
+ loader = UnstructuredURLLoader(urls=urls)
59
+ data = loader.load()
60
+
61
+ # Debug: Check loaded data
62
+ if not data:
63
+ main_placeholder.error("No content loaded from URLs. Try different URLs.")
64
+ st.stop()
65
+
66
+ main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
67
+ text_splitter = RecursiveCharacterTextSplitter(
68
+ separators=['\n\n', '\n', '.', ','],
69
+ chunk_size=1000
70
+ )
71
+ docs = text_splitter.split_documents(data)
72
+
73
+ # Debug: Check document count
74
+ main_placeholder.text(f"Split into {len(docs)} document chunks.")
75
+
76
+ main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
77
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
78
+ vectorstore_openai = FAISS.from_documents(docs, embeddings)
79
+
80
+ save_faiss_index(vectorstore_openai, faiss_index_path)
81
+ st.session_state.index_created = True
82
+ main_placeholder.text("FAISS index saved successfully! βœ…βœ…βœ…")
83
+ time.sleep(2)
84
+ main_placeholder.empty() # Clear status messages
 
 
 
 
 
 
85
  except Exception as e:
86
+ main_placeholder.error(f"Error processing URLs: {str(e)}")
87
+
88
+ query = main_placeholder.text_input("Question: ")
89
+ if query:
90
+ if not st.session_state.index_created or not os.path.exists(faiss_index_path):
91
+ main_placeholder.error("No FAISS index found. Please process URLs first.")
92
+ else:
93
+ with st.spinner("Processing your question..."):
94
+ try:
95
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
96
+ vectorstore = load_faiss_index(faiss_index_path, embeddings)
97
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
98
+ result = chain({"question": query}, return_only_outputs=True)
99
+
100
+ # Debug: Check result
101
+ if not result.get("answer"):
102
+ main_placeholder.warning("No answer generated. Try a different question or URLs.")
103
+ st.stop()
104
+
105
+ st.header("Answer")
106
+ st.write(result["answer"])
107
+
108
+ sources = result.get("sources", "")
109
+ if sources:
110
+ st.subheader("Sources:")
111
+ sources_list = sources.split("\n")
112
+ for source in sources_list:
113
+ st.write(source)
114
+ else:
115
+ st.write("No sources found.")
116
+ except Exception as e:
117
+ main_placeholder.error(f"Error answering query: {str(e)}")