SURESHBEEKHANI commited on
Commit
b835248
Β·
verified Β·
1 Parent(s): 5cc0a3b

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +111 -101
app/app.py CHANGED
@@ -1,149 +1,159 @@
1
  # Import necessary libraries
2
  import os # This library helps us work with files and folders on the computer.
3
- import streamlit as st # This library helps us create a web app easily..
4
- from dotenv import load_dotenv # This library loads secret information from a special file.
5
- from langchain.chains import RetrievalQAWithSourcesChain # This helps us answer questions and show where the answers come from.
6
- from langchain.text_splitter import RecursiveCharacterTextSplitter # This helps us break long texts into smaller parts.
7
- from langchain.document_loaders import WebBaseLoader # This helps us grab text from web pages.
8
- from langchain_groq import ChatGroq # This is a special model that can chat and answer questions.
9
- from langchain_google_genai import GoogleGenerativeAIEmbeddings # This creates special math objects called "embeddings" to help find answers.
10
- from langchain.vectorstores import FAISS # This helps us store and search through our text easily.
11
- import logging # This helps us keep track of any problems that happen in our code.
12
-
13
- # Setup logging
14
- logging.basicConfig(level=logging.INFO) # We set up logging to show important messages.
 
 
15
 
16
  # Load environment variables from the .env file
17
- load_dotenv() # This loads secret information, like API keys, from a special file called .env.
18
- api_key = os.getenv("GROQ_API_KEY") # We get the secret key we need to use the ChatGroq model.
19
- api_key_google = os.getenv("GOOGLE_API_KEY")
20
 
21
- # Initialize the ChatGroq model
22
  def initialize_model(api_key: str) -> ChatGroq:
23
  """This function sets up the ChatGroq model using the secret key."""
24
  return ChatGroq(
25
- model="mixtral-8x7b-32768", # This is the name of the chat model we want to use.
26
- temperature=0, # This controls how random the responses are (0 means more predictable).
27
- max_tokens=None, # This allows us to have as many words as we want in the response.
28
- timeout=None, # This means we won't set a time limit for how long to wait for an answer.
29
- max_retries=5, # This allows the program to try getting an answer up to 5 times if it fails.
30
- api_key=api_key # We use our secret key to access the model.
31
  )
32
 
33
- # Load and process the URL
34
  def load_url_data(url: str) -> list:
35
  """This function gets the text from a web page at the given URL."""
36
- loader = WebBaseLoader([url]) # We create a loader to grab text from the URL.
37
- return loader.load() # We load and return the content from that web page.
38
 
39
- # Split documents into chunks
40
  def split_documents(data: list) -> list:
41
  """This function breaks long pieces of text into smaller, manageable chunks."""
42
  text_splitter = RecursiveCharacterTextSplitter(
43
- chunk_size=1500, # Each piece of text will be no longer than 1500 characters.
44
- separators=["\n", "\n\n", " ", ""], # These are the ways we can split the text.
45
- chunk_overlap=20 # Each piece will overlap with the next by 20 characters.
46
  )
47
- return text_splitter.split_documents(data) # We split the text and return the smaller pieces.
48
 
49
- # Create embeddings and save FAISS index
50
  def create_faiss_index(docs: list, index_path: str) -> None:
51
- """This function makes math objects (embeddings) from the text and saves them."""
52
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # We create embeddings using a specific model.
53
- vectorstore = FAISS.from_documents(docs, embeddings) # We make a searchable index from the documents and embeddings.
54
- vectorstore.save_local(index_path) # We save this index on our computer.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  # Main application logic
57
  def main() -> None:
58
  """This is where our main app runs and does all the work."""
59
 
60
- # Custom CSS for font size
61
  st.markdown(
62
  """
63
  <style>
64
- .stTitle { font-size: 40px; }
65
- .stSidebar { font-size: 20px; }
66
- .stTextInput, .stButton { font-size: 18px; }
67
- .stTextArea { font-size: 16px; }
68
  </style>
69
  """,
70
- unsafe_allow_html=True
71
  )
72
 
73
- st.title("News Research Tool πŸ“ˆ") # We set the title of the app.
74
- #st.sidebar.image("img/news.png", width=150) # We show a picture in the sidebar.
75
- st.sidebar.title("News Article URL") # We set a title in the sidebar for the URL input.
76
 
77
- url = st.sidebar.text_input("Enter the news article URL πŸ“°") # We ask the user to input a web page URL.
78
- process_url_clicked = st.sidebar.button("Process URL πŸš€") # We add a button to start processing the URL.
79
- index_path = "faiss_index" # This is the name of the file where we will save our index.
80
- main_placeholder = st.empty() # This creates a space where we can show different messages later.
81
 
82
- # Initialize chat history in session state
83
- if "chat_history" not in st.session_state: # Check if we already have chat history saved.
84
- st.session_state.chat_history = [] # If not, we create a new list to store it.
 
 
85
 
86
- llm = initialize_model(api_key) # We set up our chat model using the secret key.
87
 
88
- if process_url_clicked: # If the button to process the URL is clicked:
89
- if not url: # Check if the user didn't enter a URL.
90
  st.sidebar.error("Please provide a URL πŸ˜”") # Show an error message.
91
- elif not (url.startswith("http://") or url.startswith("https://")): # Check if the URL is in the right format.
92
- st.sidebar.error("Invalid URL format 🚫. Please include 'http://' or 'https://' πŸ˜•.") # Show an error if not valid.
93
  else: # If the URL is valid:
94
  try:
95
- with st.spinner("Loading data from URL... πŸ€”"): # Show a loading message while we wait.
96
- data = load_url_data(url) # Get the text from the web page.
97
 
98
- with st.spinner("Splitting text into chunks... βœ‚οΈ"): # Show another loading message.
99
- docs = split_documents(data) # Break the text into smaller pieces.
100
 
101
- with st.spinner("Creating embeddings... πŸ”"): # Show another loading message.
102
- create_faiss_index(docs, index_path) # Create embeddings and save the index.
 
103
 
104
- st.success("Data processed and index saved! You can now ask questions πŸ˜ƒ") # Show a success message.
105
 
106
- except Exception as e: # If something goes wrong:
107
- logging.error(f"Error processing URL: {e}") # Log the error for us to check later.
108
- st.sidebar.error(f"Error processing URL: {e} 😞") # Show an error in the sidebar.
109
  st.error("Error occurred while processing the URL. Please try again. πŸ˜“") # Show a general error message.
110
 
111
- # Asking questions based on processed data
112
- query = main_placeholder.chat_input("Ask a question from your news URL:") # Ask the user for a question about the news.
113
-
114
- if query: # If the user has asked a question:
115
- try:
116
- if os.path.exists(index_path): # Check if our saved index exists.
117
- with st.spinner("Loading the FAISS index... πŸ—‚οΈ"): # Show a loading message.
118
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",api_key=api_key_google) # Set up embeddings again.
119
- vectorstore = FAISS.load_local(index_path, embeddings, allow_dangerous_deserialization=True) # Load our saved index.
120
-
121
- with st.spinner("Retrieving answer... πŸ€–πŸ’‘"): # Show a loading message.
122
- chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(k=5)) # Set up a question-answering system.
123
- result = chain({"question": query}, return_only_outputs=True) # Get the answer to the user's question.
124
-
125
- # Store the question and answer in chat history
126
- st.session_state.chat_history.append({"question": query, "answer": result["answer"]}) # Save the question and answer in chat history.
127
-
128
- for entry in st.session_state.chat_history: # Go through each entry in chat history:
129
- st.write(f"**Q:** {entry['question']}") # Show the question.
130
- st.write(f"**A:** {entry['answer']}") # Show the answer.
131
-
132
- # Display sources if available
133
- sources = result.get("sources", "") # Get the sources for the answer.
134
- if sources: # If there are sources:
135
- st.subheader("Sources πŸ“:") # Show a subheader for sources.
136
- sources_list = sources.split("\n") # Split the sources by new lines.
137
- for source in sources_list: # Go through each source:
138
- st.write(source) # Show each source.
139
-
140
  else:
141
- st.error("No index found. Please process a URL first πŸ˜”.") # Show an error if the index doesn't exist.
 
 
142
 
143
- except Exception as e: # If something goes wrong while retrieving the answer:
144
- logging.error(f"Error retrieving answer: {e}") # Log the error.
145
- st.error(f"Error retrieving answer: {e} 😒") # Show an error message.
146
 
147
  # Run the main application function
148
- if __name__ == "__main__": # Check if this file is being run directly:
149
- main() # Run the main function to start the app.
 
1
  # Import necessary libraries
2
  import os # This library helps us work with files and folders on the computer.
3
+ import streamlit as st # Streamlit is a framework that helps us create interactive web apps.
4
+ from dotenv import load_dotenv # This library loads environment variables from a .env file, used for configuration.
5
+ from langchain.chains import RetrievalQAWithSourcesChain # A component that allows us to answer questions based on retrieved documents.
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter # Used to split large texts into smaller, manageable parts.
7
+ from langchain.document_loaders import WebBaseLoader # This is used to fetch text from web pages.
8
+ from langchain_groq import ChatGroq # A specific chat model for generating responses.
9
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings # This creates embeddings for text to help with search and retrieval.
10
+ from langchain.vectorstores import FAISS # A library for storing and searching vector representations of documents.
11
+ from langchain_core.prompts import ChatPromptTemplate # For crafting prompts for chat interactions.
12
+ from langchain_core.messages import AIMessage, HumanMessage # To manage messages exchanged in the chat interface.
13
+ import logging # This library helps track errors and important events in the application.
14
+
15
+ # Setup logging to capture important information and errors
16
+ logging.basicConfig(level=logging.INFO) # Set the logging level to INFO to record informative messages.
17
 
18
  # Load environment variables from the .env file
19
+ load_dotenv() # Load environment variables, which may include sensitive data like API keys.
20
+ api_key = os.getenv("GROQ_API_KEY") # Retrieve the API key for the ChatGroq model.
21
+ api_key_google = os.getenv("GOOGLE_API_KEY") # Retrieve the Google API key (if needed).
22
 
23
+ # Function to initialize the ChatGroq model
24
  def initialize_model(api_key: str) -> ChatGroq:
25
  """This function sets up the ChatGroq model using the secret key."""
26
  return ChatGroq(
27
+ model="mixtral-8x7b-32768", # Specify the model name for ChatGroq.
28
+ temperature=0, # A temperature of 0 means responses are more deterministic (less random).
29
+ max_tokens=None, # No limit on the number of tokens in the response.
30
+ timeout=None, # No timeout for responses.
31
+ max_retries=5, # The model will retry up to 5 times if it fails to generate a response.
32
+ api_key=api_key # Use the provided API key for authentication.
33
  )
34
 
35
+ # Function to load data from a given URL
36
  def load_url_data(url: str) -> list:
37
  """This function gets the text from a web page at the given URL."""
38
+ loader = WebBaseLoader([url]) # Create a loader to fetch text from the provided URL.
39
+ return loader.load() # Load and return the content from that web page.
40
 
41
+ # Function to split long documents into smaller chunks
42
  def split_documents(data: list) -> list:
43
  """This function breaks long pieces of text into smaller, manageable chunks."""
44
  text_splitter = RecursiveCharacterTextSplitter(
45
+ chunk_size=1500, # Each chunk will have a maximum size of 1500 characters.
46
+ separators=["\n", "\n\n", " ", ""], # Various ways to split the text.
47
+ chunk_overlap=20 # Allow for 20 characters to overlap between chunks for context.
48
  )
49
+ return text_splitter.split_documents(data) # Split the text and return the smaller parts.
50
 
51
+ # Function to create embeddings and save them in a FAISS index
52
  def create_faiss_index(docs: list, index_path: str) -> None:
53
+ """This function makes embeddings from the text and saves them in a FAISS index."""
54
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # Create embeddings using the specified model.
55
+ vectorstore = FAISS.from_documents(docs, embeddings) # Create a searchable index from the documents and their embeddings.
56
+ vectorstore.save_local(index_path) # Save this index to a local file.
57
+ return vectorstore # Return the created vectorstore for later use.
58
+
59
+ # Function to generate a response from the AI based on user input
60
+ def get_response(user_query: str, vectorstore, chat_history, llm) -> str:
61
+ """Generates a response from the AI model based on the user query."""
62
+ # Create a template for the chat prompt using the user query
63
+ prompt_template = ChatPromptTemplate(
64
+ messages=[HumanMessage(content=user_query)] # Include the user's query in the message template.
65
+ )
66
+
67
+ # Set up the RetrievalQAWithSourcesChain to find answers based on the query
68
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(k=5)) # Use the vectorstore to retrieve relevant documents.
69
+ result = chain({"question": user_query}, return_only_outputs=True) # Get the answer from the chain.
70
+
71
+ return result["answer"] # Return the generated answer.
72
 
73
  # Main application logic
74
  def main() -> None:
75
  """This is where our main app runs and does all the work."""
76
 
77
+ # Custom CSS for modifying the appearance of the app
78
  st.markdown(
79
  """
80
  <style>
81
+ .stTitle { font-size: 40px; } # Set the font size for the title.
82
+ .stSidebar { font-size: 20px; } # Set the font size for sidebar elements.
83
+ .stTextInput, .stButton { font-size: 18px; } # Set font size for input fields and buttons.
84
+ .stTextArea { font-size: 16px; } # Set font size for text areas.
85
  </style>
86
  """,
87
+ unsafe_allow_html=True # Allow HTML for custom styling.
88
  )
89
 
90
+ st.title("News Research Tool πŸ“ˆ") # Set the main title of the application.
91
+ st.sidebar.title("News Article URL") # Title for the sidebar input.
 
92
 
93
+ url = st.sidebar.text_input("Enter the news article URL πŸ“°") # Prompt user to enter a news article URL.
94
+ process_url_clicked = st.sidebar.button("Process URL πŸš€") # Button to trigger URL processing.
95
+ index_path = "faiss_index" # Filename for saving the FAISS index.
96
+ main_placeholder = st.empty() # Create a placeholder for dynamic content in the main area.
97
 
98
+ # Initialize chat history and vectorstore in session state
99
+ if "chat_history" not in st.session_state: # Check if chat history already exists.
100
+ st.session_state.chat_history = [] # If not, create a new list to store chat history.
101
+ if "vectorstore" not in st.session_state: # Check if vectorstore is initialized.
102
+ st.session_state.vectorstore = None # Initialize vectorstore to None.
103
 
104
+ llm = initialize_model(api_key) # Initialize the chat model using the provided API key.
105
 
106
+ if process_url_clicked: # If the user clicks the button to process the URL:
107
+ if not url: # Check if the user did not enter a URL.
108
  st.sidebar.error("Please provide a URL πŸ˜”") # Show an error message.
109
+ elif not (url.startswith("http://") or url.startswith("https://")): # Check if the URL format is correct.
110
+ st.sidebar.error("Invalid URL format 🚫. Please include 'http://' or 'https://' πŸ˜•.") # Show an error if the URL is invalid.
111
  else: # If the URL is valid:
112
  try:
113
+ with st.spinner("Loading data from URL... πŸ€”"): # Show a loading spinner while data is loading.
114
+ data = load_url_data(url) # Load data from the URL.
115
 
116
+ with st.spinner("Splitting text into chunks... βœ‚οΈ"): # Indicate that the text is being split.
117
+ docs = split_documents(data) # Split the text into smaller chunks.
118
 
119
+ with st.spinner("Creating embeddings... πŸ”"): # Indicate that embeddings are being created.
120
+ vectorstore = create_faiss_index(docs, index_path) # Create embeddings and save the index.
121
+ st.session_state.vectorstore = vectorstore # Store the vectorstore in session state.
122
 
123
+ st.success("Data processed and index saved! You can now ask questions πŸ˜ƒ") # Notify the user that processing is complete.
124
 
125
+ except Exception as e: # If an error occurs during the process:
126
+ logging.error(f"Error processing URL: {e}") # Log the error for debugging.
127
+ st.sidebar.error(f"Error processing URL: {e} 😞") # Show an error message in the sidebar.
128
  st.error("Error occurred while processing the URL. Please try again. πŸ˜“") # Show a general error message.
129
 
130
+ # Display the chat history (both AI and user messages)
131
+ for message in st.session_state.chat_history: # Iterate over the stored chat messages.
132
+ if isinstance(message, AIMessage): # Check if the message is from the AI.
133
+ with st.chat_message("AI"): # Display AI messages in the chat interface.
134
+ st.markdown(message.content) # Render the AI message content.
135
+ elif isinstance(message, HumanMessage): # Check if the message is from the user.
136
+ with st.chat_message("Human"): # Display human messages in the chat interface.
137
+ st.markdown(message.content) # Render the user message content.
138
+
139
+ # Input field for the user to type their message
140
+ user_query = st.chat_input("Type a message...") # Input field for the user to enter their query.
141
+ if user_query and user_query.strip(): # If the user entered a valid, non-empty query:
142
+ st.session_state.chat_history.append(HumanMessage(content=user_query)) # Append the user query to chat history.
143
+
144
+ with st.chat_message("Human"): # Display the user's message in the chat interface.
145
+ st.markdown(user_query) # Render the user's message.
146
+
147
+ with st.chat_message("AI"): # Generate and display the AI response.
148
+ if st.session_state.vectorstore: # Ensure that the vectorstore has been initialized.
149
+ response = get_response(user_query, st.session_state.vectorstore, st.session_state.chat_history, llm) # Get the AI's response.
 
 
 
 
 
 
 
 
 
150
  else:
151
+ response = "Please process a URL first before asking questions." # Inform the user to process a URL first.
152
+
153
+ st.markdown(response) # Show the AI's response.
154
 
155
+ st.session_state.chat_history.append(AIMessage(content=response)) # Add the AI's response to chat history.
 
 
156
 
157
  # Run the main application function
158
+ if __name__ == "__main__": # Check if this script is being run as the main program.
159
+ main() # Start the application.